root / hw / misc / vfio.c @ a8aec295
History | View | Annotate | Download (98.9 kB)
1 | 65501a74 | Alex Williamson | /*
|
---|---|---|---|
2 | 65501a74 | Alex Williamson | * vfio based device assignment support
|
3 | 65501a74 | Alex Williamson | *
|
4 | 65501a74 | Alex Williamson | * Copyright Red Hat, Inc. 2012
|
5 | 65501a74 | Alex Williamson | *
|
6 | 65501a74 | Alex Williamson | * Authors:
|
7 | 65501a74 | Alex Williamson | * Alex Williamson <alex.williamson@redhat.com>
|
8 | 65501a74 | Alex Williamson | *
|
9 | 65501a74 | Alex Williamson | * This work is licensed under the terms of the GNU GPL, version 2. See
|
10 | 65501a74 | Alex Williamson | * the COPYING file in the top-level directory.
|
11 | 65501a74 | Alex Williamson | *
|
12 | 65501a74 | Alex Williamson | * Based on qemu-kvm device-assignment:
|
13 | 65501a74 | Alex Williamson | * Adapted for KVM by Qumranet.
|
14 | 65501a74 | Alex Williamson | * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
|
15 | 65501a74 | Alex Williamson | * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
|
16 | 65501a74 | Alex Williamson | * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
|
17 | 65501a74 | Alex Williamson | * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
|
18 | 65501a74 | Alex Williamson | * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
|
19 | 65501a74 | Alex Williamson | */
|
20 | 65501a74 | Alex Williamson | |
21 | 65501a74 | Alex Williamson | #include <dirent.h> |
22 | 6dcfdbad | Alex Williamson | #include <linux/vfio.h> |
23 | 65501a74 | Alex Williamson | #include <sys/ioctl.h> |
24 | 65501a74 | Alex Williamson | #include <sys/mman.h> |
25 | 65501a74 | Alex Williamson | #include <sys/stat.h> |
26 | 65501a74 | Alex Williamson | #include <sys/types.h> |
27 | 6dcfdbad | Alex Williamson | #include <unistd.h> |
28 | 65501a74 | Alex Williamson | |
29 | 65501a74 | Alex Williamson | #include "config.h" |
30 | 022c62cb | Paolo Bonzini | #include "exec/address-spaces.h" |
31 | 022c62cb | Paolo Bonzini | #include "exec/memory.h" |
32 | 83c9f4ca | Paolo Bonzini | #include "hw/pci/msi.h" |
33 | 83c9f4ca | Paolo Bonzini | #include "hw/pci/msix.h" |
34 | 83c9f4ca | Paolo Bonzini | #include "hw/pci/pci.h" |
35 | 5c97e5eb | Alex Williamson | #include "qemu-common.h" |
36 | 1de7afc9 | Paolo Bonzini | #include "qemu/error-report.h" |
37 | 6dcfdbad | Alex Williamson | #include "qemu/event_notifier.h" |
38 | 1de7afc9 | Paolo Bonzini | #include "qemu/queue.h" |
39 | 1de7afc9 | Paolo Bonzini | #include "qemu/range.h" |
40 | 6dcfdbad | Alex Williamson | #include "sysemu/kvm.h" |
41 | 6dcfdbad | Alex Williamson | #include "sysemu/sysemu.h" |
42 | 65501a74 | Alex Williamson | |
43 | 65501a74 | Alex Williamson | /* #define DEBUG_VFIO */
|
44 | 65501a74 | Alex Williamson | #ifdef DEBUG_VFIO
|
45 | 65501a74 | Alex Williamson | #define DPRINTF(fmt, ...) \
|
46 | 65501a74 | Alex Williamson | do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0) |
47 | 65501a74 | Alex Williamson | #else
|
48 | 65501a74 | Alex Williamson | #define DPRINTF(fmt, ...) \
|
49 | 65501a74 | Alex Williamson | do { } while (0) |
50 | 65501a74 | Alex Williamson | #endif
|
51 | 65501a74 | Alex Williamson | |
52 | 82ca8912 | Alex Williamson | /* Extra debugging, trap acceleration paths for more logging */
|
53 | 82ca8912 | Alex Williamson | #define VFIO_ALLOW_MMAP 1 |
54 | 82ca8912 | Alex Williamson | #define VFIO_ALLOW_KVM_INTX 1 |
55 | 82ca8912 | Alex Williamson | |
56 | 7076eabc | Alex Williamson | struct VFIODevice;
|
57 | 7076eabc | Alex Williamson | |
58 | 7076eabc | Alex Williamson | typedef struct VFIOQuirk { |
59 | 7076eabc | Alex Williamson | MemoryRegion mem; |
60 | 7076eabc | Alex Williamson | struct VFIODevice *vdev;
|
61 | 7076eabc | Alex Williamson | QLIST_ENTRY(VFIOQuirk) next; |
62 | 7076eabc | Alex Williamson | uint32_t data; |
63 | 7076eabc | Alex Williamson | uint32_t data2; |
64 | 7076eabc | Alex Williamson | } VFIOQuirk; |
65 | 7076eabc | Alex Williamson | |
66 | 5c97e5eb | Alex Williamson | typedef struct VFIOBAR { |
67 | 5c97e5eb | Alex Williamson | off_t fd_offset; /* offset of BAR within device fd */
|
68 | 5c97e5eb | Alex Williamson | int fd; /* device fd, allows us to pass VFIOBAR as opaque data */ |
69 | 5c97e5eb | Alex Williamson | MemoryRegion mem; /* slow, read/write access */
|
70 | 5c97e5eb | Alex Williamson | MemoryRegion mmap_mem; /* direct mapped access */
|
71 | 5c97e5eb | Alex Williamson | void *mmap;
|
72 | 5c97e5eb | Alex Williamson | size_t size; |
73 | 5c97e5eb | Alex Williamson | uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
|
74 | 5c97e5eb | Alex Williamson | uint8_t nr; /* cache the BAR number for debug */
|
75 | 7076eabc | Alex Williamson | QLIST_HEAD(, VFIOQuirk) quirks; |
76 | 5c97e5eb | Alex Williamson | } VFIOBAR; |
77 | 5c97e5eb | Alex Williamson | |
78 | f15689c7 | Alex Williamson | typedef struct VFIOVGARegion { |
79 | f15689c7 | Alex Williamson | MemoryRegion mem; |
80 | f15689c7 | Alex Williamson | off_t offset; |
81 | f15689c7 | Alex Williamson | int nr;
|
82 | 7076eabc | Alex Williamson | QLIST_HEAD(, VFIOQuirk) quirks; |
83 | f15689c7 | Alex Williamson | } VFIOVGARegion; |
84 | f15689c7 | Alex Williamson | |
85 | f15689c7 | Alex Williamson | typedef struct VFIOVGA { |
86 | f15689c7 | Alex Williamson | off_t fd_offset; |
87 | f15689c7 | Alex Williamson | int fd;
|
88 | f15689c7 | Alex Williamson | VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS]; |
89 | f15689c7 | Alex Williamson | } VFIOVGA; |
90 | f15689c7 | Alex Williamson | |
91 | 5c97e5eb | Alex Williamson | typedef struct VFIOINTx { |
92 | 5c97e5eb | Alex Williamson | bool pending; /* interrupt pending */ |
93 | 5c97e5eb | Alex Williamson | bool kvm_accel; /* set when QEMU bypass through KVM enabled */ |
94 | 5c97e5eb | Alex Williamson | uint8_t pin; /* which pin to pull for qemu_set_irq */
|
95 | 5c97e5eb | Alex Williamson | EventNotifier interrupt; /* eventfd triggered on interrupt */
|
96 | 5c97e5eb | Alex Williamson | EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
|
97 | 5c97e5eb | Alex Williamson | PCIINTxRoute route; /* routing info for QEMU bypass */
|
98 | 5c97e5eb | Alex Williamson | uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
|
99 | 5c97e5eb | Alex Williamson | QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
|
100 | 5c97e5eb | Alex Williamson | } VFIOINTx; |
101 | 5c97e5eb | Alex Williamson | |
102 | 5c97e5eb | Alex Williamson | typedef struct VFIOMSIVector { |
103 | 5c97e5eb | Alex Williamson | EventNotifier interrupt; /* eventfd triggered on interrupt */
|
104 | 5c97e5eb | Alex Williamson | struct VFIODevice *vdev; /* back pointer to device */ |
105 | 5c97e5eb | Alex Williamson | int virq; /* KVM irqchip route for QEMU bypass */ |
106 | 5c97e5eb | Alex Williamson | bool use;
|
107 | 5c97e5eb | Alex Williamson | } VFIOMSIVector; |
108 | 5c97e5eb | Alex Williamson | |
109 | 5c97e5eb | Alex Williamson | enum {
|
110 | 5c97e5eb | Alex Williamson | VFIO_INT_NONE = 0,
|
111 | 5c97e5eb | Alex Williamson | VFIO_INT_INTx = 1,
|
112 | 5c97e5eb | Alex Williamson | VFIO_INT_MSI = 2,
|
113 | 5c97e5eb | Alex Williamson | VFIO_INT_MSIX = 3,
|
114 | 5c97e5eb | Alex Williamson | }; |
115 | 5c97e5eb | Alex Williamson | |
116 | 5c97e5eb | Alex Williamson | struct VFIOGroup;
|
117 | 5c97e5eb | Alex Williamson | |
118 | 5c97e5eb | Alex Williamson | typedef struct VFIOContainer { |
119 | 5c97e5eb | Alex Williamson | int fd; /* /dev/vfio/vfio, empowered by the attached groups */ |
120 | 5c97e5eb | Alex Williamson | struct {
|
121 | 5c97e5eb | Alex Williamson | /* enable abstraction to support various iommu backends */
|
122 | 5c97e5eb | Alex Williamson | union {
|
123 | 5c97e5eb | Alex Williamson | MemoryListener listener; /* Used by type1 iommu */
|
124 | 5c97e5eb | Alex Williamson | }; |
125 | 5c97e5eb | Alex Williamson | void (*release)(struct VFIOContainer *); |
126 | 5c97e5eb | Alex Williamson | } iommu_data; |
127 | 5c97e5eb | Alex Williamson | QLIST_HEAD(, VFIOGroup) group_list; |
128 | 5c97e5eb | Alex Williamson | QLIST_ENTRY(VFIOContainer) next; |
129 | 5c97e5eb | Alex Williamson | } VFIOContainer; |
130 | 5c97e5eb | Alex Williamson | |
131 | 5c97e5eb | Alex Williamson | /* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
|
132 | 5c97e5eb | Alex Williamson | typedef struct VFIOMSIXInfo { |
133 | 5c97e5eb | Alex Williamson | uint8_t table_bar; |
134 | 5c97e5eb | Alex Williamson | uint8_t pba_bar; |
135 | 5c97e5eb | Alex Williamson | uint16_t entries; |
136 | 5c97e5eb | Alex Williamson | uint32_t table_offset; |
137 | 5c97e5eb | Alex Williamson | uint32_t pba_offset; |
138 | 5c97e5eb | Alex Williamson | MemoryRegion mmap_mem; |
139 | 5c97e5eb | Alex Williamson | void *mmap;
|
140 | 5c97e5eb | Alex Williamson | } VFIOMSIXInfo; |
141 | 5c97e5eb | Alex Williamson | |
142 | 5c97e5eb | Alex Williamson | typedef struct VFIODevice { |
143 | 5c97e5eb | Alex Williamson | PCIDevice pdev; |
144 | 5c97e5eb | Alex Williamson | int fd;
|
145 | 5c97e5eb | Alex Williamson | VFIOINTx intx; |
146 | 5c97e5eb | Alex Williamson | unsigned int config_size; |
147 | 4b5d5e87 | Alex Williamson | uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */
|
148 | 5c97e5eb | Alex Williamson | off_t config_offset; /* Offset of config space region within device fd */
|
149 | 5c97e5eb | Alex Williamson | unsigned int rom_size; |
150 | 5c97e5eb | Alex Williamson | off_t rom_offset; /* Offset of ROM region within device fd */
|
151 | 5c97e5eb | Alex Williamson | int msi_cap_size;
|
152 | 5c97e5eb | Alex Williamson | VFIOMSIVector *msi_vectors; |
153 | 5c97e5eb | Alex Williamson | VFIOMSIXInfo *msix; |
154 | 5c97e5eb | Alex Williamson | int nr_vectors; /* Number of MSI/MSIX vectors currently in use */ |
155 | 5c97e5eb | Alex Williamson | int interrupt; /* Current interrupt type */ |
156 | 5c97e5eb | Alex Williamson | VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */ |
157 | f15689c7 | Alex Williamson | VFIOVGA vga; /* 0xa0000, 0x3b0, 0x3c0 */
|
158 | 5c97e5eb | Alex Williamson | PCIHostDeviceAddress host; |
159 | 5c97e5eb | Alex Williamson | QLIST_ENTRY(VFIODevice) next; |
160 | 5c97e5eb | Alex Williamson | struct VFIOGroup *group;
|
161 | f15689c7 | Alex Williamson | uint32_t features; |
162 | f15689c7 | Alex Williamson | #define VFIO_FEATURE_ENABLE_VGA_BIT 0 |
163 | f15689c7 | Alex Williamson | #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) |
164 | c29029dd | Alex Williamson | int32_t bootindex; |
165 | ba661818 | Alex Williamson | uint8_t pm_cap; |
166 | 5c97e5eb | Alex Williamson | bool reset_works;
|
167 | f15689c7 | Alex Williamson | bool has_vga;
|
168 | 5c97e5eb | Alex Williamson | } VFIODevice; |
169 | 5c97e5eb | Alex Williamson | |
170 | 5c97e5eb | Alex Williamson | typedef struct VFIOGroup { |
171 | 5c97e5eb | Alex Williamson | int fd;
|
172 | 5c97e5eb | Alex Williamson | int groupid;
|
173 | 5c97e5eb | Alex Williamson | VFIOContainer *container; |
174 | 5c97e5eb | Alex Williamson | QLIST_HEAD(, VFIODevice) device_list; |
175 | 5c97e5eb | Alex Williamson | QLIST_ENTRY(VFIOGroup) next; |
176 | 5c97e5eb | Alex Williamson | QLIST_ENTRY(VFIOGroup) container_next; |
177 | 5c97e5eb | Alex Williamson | } VFIOGroup; |
178 | 5c97e5eb | Alex Williamson | |
179 | 65501a74 | Alex Williamson | #define MSIX_CAP_LENGTH 12 |
180 | 65501a74 | Alex Williamson | |
181 | 65501a74 | Alex Williamson | static QLIST_HEAD(, VFIOContainer)
|
182 | 65501a74 | Alex Williamson | container_list = QLIST_HEAD_INITIALIZER(container_list); |
183 | 65501a74 | Alex Williamson | |
184 | 65501a74 | Alex Williamson | static QLIST_HEAD(, VFIOGroup)
|
185 | 65501a74 | Alex Williamson | group_list = QLIST_HEAD_INITIALIZER(group_list); |
186 | 65501a74 | Alex Williamson | |
187 | 65501a74 | Alex Williamson | static void vfio_disable_interrupts(VFIODevice *vdev); |
188 | 65501a74 | Alex Williamson | static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); |
189 | 7076eabc | Alex Williamson | static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, |
190 | 7076eabc | Alex Williamson | uint32_t val, int len);
|
191 | 65501a74 | Alex Williamson | static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled); |
192 | 65501a74 | Alex Williamson | |
193 | 65501a74 | Alex Williamson | /*
|
194 | 65501a74 | Alex Williamson | * Common VFIO interrupt disable
|
195 | 65501a74 | Alex Williamson | */
|
196 | 65501a74 | Alex Williamson | static void vfio_disable_irqindex(VFIODevice *vdev, int index) |
197 | 65501a74 | Alex Williamson | { |
198 | 65501a74 | Alex Williamson | struct vfio_irq_set irq_set = {
|
199 | 65501a74 | Alex Williamson | .argsz = sizeof(irq_set),
|
200 | 65501a74 | Alex Williamson | .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, |
201 | 65501a74 | Alex Williamson | .index = index, |
202 | 65501a74 | Alex Williamson | .start = 0,
|
203 | 65501a74 | Alex Williamson | .count = 0,
|
204 | 65501a74 | Alex Williamson | }; |
205 | 65501a74 | Alex Williamson | |
206 | 65501a74 | Alex Williamson | ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); |
207 | 65501a74 | Alex Williamson | } |
208 | 65501a74 | Alex Williamson | |
209 | 65501a74 | Alex Williamson | /*
|
210 | 65501a74 | Alex Williamson | * INTx
|
211 | 65501a74 | Alex Williamson | */
|
212 | 65501a74 | Alex Williamson | static void vfio_unmask_intx(VFIODevice *vdev) |
213 | 65501a74 | Alex Williamson | { |
214 | 65501a74 | Alex Williamson | struct vfio_irq_set irq_set = {
|
215 | 65501a74 | Alex Williamson | .argsz = sizeof(irq_set),
|
216 | 65501a74 | Alex Williamson | .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, |
217 | 65501a74 | Alex Williamson | .index = VFIO_PCI_INTX_IRQ_INDEX, |
218 | 65501a74 | Alex Williamson | .start = 0,
|
219 | 65501a74 | Alex Williamson | .count = 1,
|
220 | 65501a74 | Alex Williamson | }; |
221 | 65501a74 | Alex Williamson | |
222 | 65501a74 | Alex Williamson | ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); |
223 | 65501a74 | Alex Williamson | } |
224 | 65501a74 | Alex Williamson | |
225 | e1d1e586 | Alex Williamson | #ifdef CONFIG_KVM /* Unused outside of CONFIG_KVM code */ |
226 | e1d1e586 | Alex Williamson | static void vfio_mask_intx(VFIODevice *vdev) |
227 | e1d1e586 | Alex Williamson | { |
228 | e1d1e586 | Alex Williamson | struct vfio_irq_set irq_set = {
|
229 | e1d1e586 | Alex Williamson | .argsz = sizeof(irq_set),
|
230 | e1d1e586 | Alex Williamson | .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, |
231 | e1d1e586 | Alex Williamson | .index = VFIO_PCI_INTX_IRQ_INDEX, |
232 | e1d1e586 | Alex Williamson | .start = 0,
|
233 | e1d1e586 | Alex Williamson | .count = 1,
|
234 | e1d1e586 | Alex Williamson | }; |
235 | e1d1e586 | Alex Williamson | |
236 | e1d1e586 | Alex Williamson | ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); |
237 | e1d1e586 | Alex Williamson | } |
238 | e1d1e586 | Alex Williamson | #endif
|
239 | e1d1e586 | Alex Williamson | |
240 | ea486926 | Alex Williamson | /*
|
241 | ea486926 | Alex Williamson | * Disabling BAR mmaping can be slow, but toggling it around INTx can
|
242 | ea486926 | Alex Williamson | * also be a huge overhead. We try to get the best of both worlds by
|
243 | ea486926 | Alex Williamson | * waiting until an interrupt to disable mmaps (subsequent transitions
|
244 | ea486926 | Alex Williamson | * to the same state are effectively no overhead). If the interrupt has
|
245 | ea486926 | Alex Williamson | * been serviced and the time gap is long enough, we re-enable mmaps for
|
246 | ea486926 | Alex Williamson | * performance. This works well for things like graphics cards, which
|
247 | ea486926 | Alex Williamson | * may not use their interrupt at all and are penalized to an unusable
|
248 | ea486926 | Alex Williamson | * level by read/write BAR traps. Other devices, like NICs, have more
|
249 | ea486926 | Alex Williamson | * regular interrupts and see much better latency by staying in non-mmap
|
250 | ea486926 | Alex Williamson | * mode. We therefore set the default mmap_timeout such that a ping
|
251 | ea486926 | Alex Williamson | * is just enough to keep the mmap disabled. Users can experiment with
|
252 | ea486926 | Alex Williamson | * other options with the x-intx-mmap-timeout-ms parameter (a value of
|
253 | ea486926 | Alex Williamson | * zero disables the timer).
|
254 | ea486926 | Alex Williamson | */
|
255 | ea486926 | Alex Williamson | static void vfio_intx_mmap_enable(void *opaque) |
256 | ea486926 | Alex Williamson | { |
257 | ea486926 | Alex Williamson | VFIODevice *vdev = opaque; |
258 | ea486926 | Alex Williamson | |
259 | ea486926 | Alex Williamson | if (vdev->intx.pending) {
|
260 | ea486926 | Alex Williamson | qemu_mod_timer(vdev->intx.mmap_timer, |
261 | ea486926 | Alex Williamson | qemu_get_clock_ms(vm_clock) + vdev->intx.mmap_timeout); |
262 | ea486926 | Alex Williamson | return;
|
263 | ea486926 | Alex Williamson | } |
264 | ea486926 | Alex Williamson | |
265 | ea486926 | Alex Williamson | vfio_mmap_set_enabled(vdev, true);
|
266 | ea486926 | Alex Williamson | } |
267 | ea486926 | Alex Williamson | |
268 | 65501a74 | Alex Williamson | static void vfio_intx_interrupt(void *opaque) |
269 | 65501a74 | Alex Williamson | { |
270 | 65501a74 | Alex Williamson | VFIODevice *vdev = opaque; |
271 | 65501a74 | Alex Williamson | |
272 | 65501a74 | Alex Williamson | if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) {
|
273 | 65501a74 | Alex Williamson | return;
|
274 | 65501a74 | Alex Williamson | } |
275 | 65501a74 | Alex Williamson | |
276 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) Pin %c\n", __func__, vdev->host.domain,
|
277 | 65501a74 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function, |
278 | 65501a74 | Alex Williamson | 'A' + vdev->intx.pin);
|
279 | 65501a74 | Alex Williamson | |
280 | 65501a74 | Alex Williamson | vdev->intx.pending = true;
|
281 | 65501a74 | Alex Williamson | qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 1);
|
282 | ea486926 | Alex Williamson | vfio_mmap_set_enabled(vdev, false);
|
283 | ea486926 | Alex Williamson | if (vdev->intx.mmap_timeout) {
|
284 | ea486926 | Alex Williamson | qemu_mod_timer(vdev->intx.mmap_timer, |
285 | ea486926 | Alex Williamson | qemu_get_clock_ms(vm_clock) + vdev->intx.mmap_timeout); |
286 | ea486926 | Alex Williamson | } |
287 | 65501a74 | Alex Williamson | } |
288 | 65501a74 | Alex Williamson | |
289 | 65501a74 | Alex Williamson | static void vfio_eoi(VFIODevice *vdev) |
290 | 65501a74 | Alex Williamson | { |
291 | 65501a74 | Alex Williamson | if (!vdev->intx.pending) {
|
292 | 65501a74 | Alex Williamson | return;
|
293 | 65501a74 | Alex Williamson | } |
294 | 65501a74 | Alex Williamson | |
295 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) EOI\n", __func__, vdev->host.domain,
|
296 | 65501a74 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
297 | 65501a74 | Alex Williamson | |
298 | 65501a74 | Alex Williamson | vdev->intx.pending = false;
|
299 | 65501a74 | Alex Williamson | qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
|
300 | 65501a74 | Alex Williamson | vfio_unmask_intx(vdev); |
301 | 65501a74 | Alex Williamson | } |
302 | 65501a74 | Alex Williamson | |
303 | e1d1e586 | Alex Williamson | static void vfio_enable_intx_kvm(VFIODevice *vdev) |
304 | e1d1e586 | Alex Williamson | { |
305 | e1d1e586 | Alex Williamson | #ifdef CONFIG_KVM
|
306 | e1d1e586 | Alex Williamson | struct kvm_irqfd irqfd = {
|
307 | e1d1e586 | Alex Williamson | .fd = event_notifier_get_fd(&vdev->intx.interrupt), |
308 | e1d1e586 | Alex Williamson | .gsi = vdev->intx.route.irq, |
309 | e1d1e586 | Alex Williamson | .flags = KVM_IRQFD_FLAG_RESAMPLE, |
310 | e1d1e586 | Alex Williamson | }; |
311 | e1d1e586 | Alex Williamson | struct vfio_irq_set *irq_set;
|
312 | e1d1e586 | Alex Williamson | int ret, argsz;
|
313 | e1d1e586 | Alex Williamson | int32_t *pfd; |
314 | e1d1e586 | Alex Williamson | |
315 | 82ca8912 | Alex Williamson | if (!VFIO_ALLOW_KVM_INTX || !kvm_irqfds_enabled() ||
|
316 | e1d1e586 | Alex Williamson | vdev->intx.route.mode != PCI_INTX_ENABLED || |
317 | e1d1e586 | Alex Williamson | !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { |
318 | e1d1e586 | Alex Williamson | return;
|
319 | e1d1e586 | Alex Williamson | } |
320 | e1d1e586 | Alex Williamson | |
321 | e1d1e586 | Alex Williamson | /* Get to a known interrupt state */
|
322 | e1d1e586 | Alex Williamson | qemu_set_fd_handler(irqfd.fd, NULL, NULL, vdev); |
323 | e1d1e586 | Alex Williamson | vfio_mask_intx(vdev); |
324 | e1d1e586 | Alex Williamson | vdev->intx.pending = false;
|
325 | e1d1e586 | Alex Williamson | qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
|
326 | e1d1e586 | Alex Williamson | |
327 | e1d1e586 | Alex Williamson | /* Get an eventfd for resample/unmask */
|
328 | e1d1e586 | Alex Williamson | if (event_notifier_init(&vdev->intx.unmask, 0)) { |
329 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: event_notifier_init failed eoi");
|
330 | e1d1e586 | Alex Williamson | goto fail;
|
331 | e1d1e586 | Alex Williamson | } |
332 | e1d1e586 | Alex Williamson | |
333 | e1d1e586 | Alex Williamson | /* KVM triggers it, VFIO listens for it */
|
334 | e1d1e586 | Alex Williamson | irqfd.resamplefd = event_notifier_get_fd(&vdev->intx.unmask); |
335 | e1d1e586 | Alex Williamson | |
336 | e1d1e586 | Alex Williamson | if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) {
|
337 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: Failed to setup resample irqfd: %m");
|
338 | e1d1e586 | Alex Williamson | goto fail_irqfd;
|
339 | e1d1e586 | Alex Williamson | } |
340 | e1d1e586 | Alex Williamson | |
341 | e1d1e586 | Alex Williamson | argsz = sizeof(*irq_set) + sizeof(*pfd); |
342 | e1d1e586 | Alex Williamson | |
343 | e1d1e586 | Alex Williamson | irq_set = g_malloc0(argsz); |
344 | e1d1e586 | Alex Williamson | irq_set->argsz = argsz; |
345 | e1d1e586 | Alex Williamson | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK; |
346 | e1d1e586 | Alex Williamson | irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; |
347 | e1d1e586 | Alex Williamson | irq_set->start = 0;
|
348 | e1d1e586 | Alex Williamson | irq_set->count = 1;
|
349 | e1d1e586 | Alex Williamson | pfd = (int32_t *)&irq_set->data; |
350 | e1d1e586 | Alex Williamson | |
351 | e1d1e586 | Alex Williamson | *pfd = irqfd.resamplefd; |
352 | e1d1e586 | Alex Williamson | |
353 | e1d1e586 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); |
354 | e1d1e586 | Alex Williamson | g_free(irq_set); |
355 | e1d1e586 | Alex Williamson | if (ret) {
|
356 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: Failed to setup INTx unmask fd: %m");
|
357 | e1d1e586 | Alex Williamson | goto fail_vfio;
|
358 | e1d1e586 | Alex Williamson | } |
359 | e1d1e586 | Alex Williamson | |
360 | e1d1e586 | Alex Williamson | /* Let'em rip */
|
361 | e1d1e586 | Alex Williamson | vfio_unmask_intx(vdev); |
362 | e1d1e586 | Alex Williamson | |
363 | e1d1e586 | Alex Williamson | vdev->intx.kvm_accel = true;
|
364 | e1d1e586 | Alex Williamson | |
365 | e1d1e586 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel enabled\n",
|
366 | e1d1e586 | Alex Williamson | __func__, vdev->host.domain, vdev->host.bus, |
367 | e1d1e586 | Alex Williamson | vdev->host.slot, vdev->host.function); |
368 | e1d1e586 | Alex Williamson | |
369 | e1d1e586 | Alex Williamson | return;
|
370 | e1d1e586 | Alex Williamson | |
371 | e1d1e586 | Alex Williamson | fail_vfio:
|
372 | e1d1e586 | Alex Williamson | irqfd.flags = KVM_IRQFD_FLAG_DEASSIGN; |
373 | e1d1e586 | Alex Williamson | kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd); |
374 | e1d1e586 | Alex Williamson | fail_irqfd:
|
375 | e1d1e586 | Alex Williamson | event_notifier_cleanup(&vdev->intx.unmask); |
376 | e1d1e586 | Alex Williamson | fail:
|
377 | e1d1e586 | Alex Williamson | qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev);
|
378 | e1d1e586 | Alex Williamson | vfio_unmask_intx(vdev); |
379 | e1d1e586 | Alex Williamson | #endif
|
380 | e1d1e586 | Alex Williamson | } |
381 | e1d1e586 | Alex Williamson | |
382 | e1d1e586 | Alex Williamson | static void vfio_disable_intx_kvm(VFIODevice *vdev) |
383 | e1d1e586 | Alex Williamson | { |
384 | e1d1e586 | Alex Williamson | #ifdef CONFIG_KVM
|
385 | e1d1e586 | Alex Williamson | struct kvm_irqfd irqfd = {
|
386 | e1d1e586 | Alex Williamson | .fd = event_notifier_get_fd(&vdev->intx.interrupt), |
387 | e1d1e586 | Alex Williamson | .gsi = vdev->intx.route.irq, |
388 | e1d1e586 | Alex Williamson | .flags = KVM_IRQFD_FLAG_DEASSIGN, |
389 | e1d1e586 | Alex Williamson | }; |
390 | e1d1e586 | Alex Williamson | |
391 | e1d1e586 | Alex Williamson | if (!vdev->intx.kvm_accel) {
|
392 | e1d1e586 | Alex Williamson | return;
|
393 | e1d1e586 | Alex Williamson | } |
394 | e1d1e586 | Alex Williamson | |
395 | e1d1e586 | Alex Williamson | /*
|
396 | e1d1e586 | Alex Williamson | * Get to a known state, hardware masked, QEMU ready to accept new
|
397 | e1d1e586 | Alex Williamson | * interrupts, QEMU IRQ de-asserted.
|
398 | e1d1e586 | Alex Williamson | */
|
399 | e1d1e586 | Alex Williamson | vfio_mask_intx(vdev); |
400 | e1d1e586 | Alex Williamson | vdev->intx.pending = false;
|
401 | e1d1e586 | Alex Williamson | qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
|
402 | e1d1e586 | Alex Williamson | |
403 | e1d1e586 | Alex Williamson | /* Tell KVM to stop listening for an INTx irqfd */
|
404 | e1d1e586 | Alex Williamson | if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) {
|
405 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: Failed to disable INTx irqfd: %m");
|
406 | e1d1e586 | Alex Williamson | } |
407 | e1d1e586 | Alex Williamson | |
408 | e1d1e586 | Alex Williamson | /* We only need to close the eventfd for VFIO to cleanup the kernel side */
|
409 | e1d1e586 | Alex Williamson | event_notifier_cleanup(&vdev->intx.unmask); |
410 | e1d1e586 | Alex Williamson | |
411 | e1d1e586 | Alex Williamson | /* QEMU starts listening for interrupt events. */
|
412 | e1d1e586 | Alex Williamson | qemu_set_fd_handler(irqfd.fd, vfio_intx_interrupt, NULL, vdev);
|
413 | e1d1e586 | Alex Williamson | |
414 | e1d1e586 | Alex Williamson | vdev->intx.kvm_accel = false;
|
415 | e1d1e586 | Alex Williamson | |
416 | e1d1e586 | Alex Williamson | /* If we've missed an event, let it re-fire through QEMU */
|
417 | e1d1e586 | Alex Williamson | vfio_unmask_intx(vdev); |
418 | e1d1e586 | Alex Williamson | |
419 | e1d1e586 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) KVM INTx accel disabled\n",
|
420 | e1d1e586 | Alex Williamson | __func__, vdev->host.domain, vdev->host.bus, |
421 | e1d1e586 | Alex Williamson | vdev->host.slot, vdev->host.function); |
422 | e1d1e586 | Alex Williamson | #endif
|
423 | e1d1e586 | Alex Williamson | } |
424 | e1d1e586 | Alex Williamson | |
425 | e1d1e586 | Alex Williamson | static void vfio_update_irq(PCIDevice *pdev) |
426 | e1d1e586 | Alex Williamson | { |
427 | e1d1e586 | Alex Williamson | VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); |
428 | e1d1e586 | Alex Williamson | PCIINTxRoute route; |
429 | e1d1e586 | Alex Williamson | |
430 | e1d1e586 | Alex Williamson | if (vdev->interrupt != VFIO_INT_INTx) {
|
431 | e1d1e586 | Alex Williamson | return;
|
432 | e1d1e586 | Alex Williamson | } |
433 | e1d1e586 | Alex Williamson | |
434 | e1d1e586 | Alex Williamson | route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); |
435 | e1d1e586 | Alex Williamson | |
436 | e1d1e586 | Alex Williamson | if (!pci_intx_route_changed(&vdev->intx.route, &route)) {
|
437 | e1d1e586 | Alex Williamson | return; /* Nothing changed */ |
438 | e1d1e586 | Alex Williamson | } |
439 | e1d1e586 | Alex Williamson | |
440 | e1d1e586 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) IRQ moved %d -> %d\n", __func__,
|
441 | e1d1e586 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
442 | e1d1e586 | Alex Williamson | vdev->host.function, vdev->intx.route.irq, route.irq); |
443 | e1d1e586 | Alex Williamson | |
444 | e1d1e586 | Alex Williamson | vfio_disable_intx_kvm(vdev); |
445 | e1d1e586 | Alex Williamson | |
446 | e1d1e586 | Alex Williamson | vdev->intx.route = route; |
447 | e1d1e586 | Alex Williamson | |
448 | e1d1e586 | Alex Williamson | if (route.mode != PCI_INTX_ENABLED) {
|
449 | e1d1e586 | Alex Williamson | return;
|
450 | e1d1e586 | Alex Williamson | } |
451 | e1d1e586 | Alex Williamson | |
452 | e1d1e586 | Alex Williamson | vfio_enable_intx_kvm(vdev); |
453 | e1d1e586 | Alex Williamson | |
454 | e1d1e586 | Alex Williamson | /* Re-enable the interrupt in cased we missed an EOI */
|
455 | e1d1e586 | Alex Williamson | vfio_eoi(vdev); |
456 | e1d1e586 | Alex Williamson | } |
457 | e1d1e586 | Alex Williamson | |
458 | 65501a74 | Alex Williamson | static int vfio_enable_intx(VFIODevice *vdev) |
459 | 65501a74 | Alex Williamson | { |
460 | 65501a74 | Alex Williamson | uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
|
461 | 1a403133 | Alex Williamson | int ret, argsz;
|
462 | 1a403133 | Alex Williamson | struct vfio_irq_set *irq_set;
|
463 | 1a403133 | Alex Williamson | int32_t *pfd; |
464 | 65501a74 | Alex Williamson | |
465 | ea486926 | Alex Williamson | if (!pin) {
|
466 | 65501a74 | Alex Williamson | return 0; |
467 | 65501a74 | Alex Williamson | } |
468 | 65501a74 | Alex Williamson | |
469 | 65501a74 | Alex Williamson | vfio_disable_interrupts(vdev); |
470 | 65501a74 | Alex Williamson | |
471 | 65501a74 | Alex Williamson | vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ |
472 | e1d1e586 | Alex Williamson | |
473 | e1d1e586 | Alex Williamson | #ifdef CONFIG_KVM
|
474 | e1d1e586 | Alex Williamson | /*
|
475 | e1d1e586 | Alex Williamson | * Only conditional to avoid generating error messages on platforms
|
476 | e1d1e586 | Alex Williamson | * where we won't actually use the result anyway.
|
477 | e1d1e586 | Alex Williamson | */
|
478 | d281084d | Alex Williamson | if (kvm_irqfds_enabled() &&
|
479 | d281084d | Alex Williamson | kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { |
480 | e1d1e586 | Alex Williamson | vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev, |
481 | e1d1e586 | Alex Williamson | vdev->intx.pin); |
482 | e1d1e586 | Alex Williamson | } |
483 | e1d1e586 | Alex Williamson | #endif
|
484 | e1d1e586 | Alex Williamson | |
485 | 65501a74 | Alex Williamson | ret = event_notifier_init(&vdev->intx.interrupt, 0);
|
486 | 65501a74 | Alex Williamson | if (ret) {
|
487 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: event_notifier_init failed");
|
488 | 65501a74 | Alex Williamson | return ret;
|
489 | 65501a74 | Alex Williamson | } |
490 | 65501a74 | Alex Williamson | |
491 | 1a403133 | Alex Williamson | argsz = sizeof(*irq_set) + sizeof(*pfd); |
492 | 1a403133 | Alex Williamson | |
493 | 1a403133 | Alex Williamson | irq_set = g_malloc0(argsz); |
494 | 1a403133 | Alex Williamson | irq_set->argsz = argsz; |
495 | 1a403133 | Alex Williamson | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; |
496 | 1a403133 | Alex Williamson | irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; |
497 | 1a403133 | Alex Williamson | irq_set->start = 0;
|
498 | 1a403133 | Alex Williamson | irq_set->count = 1;
|
499 | 1a403133 | Alex Williamson | pfd = (int32_t *)&irq_set->data; |
500 | 1a403133 | Alex Williamson | |
501 | 1a403133 | Alex Williamson | *pfd = event_notifier_get_fd(&vdev->intx.interrupt); |
502 | 1a403133 | Alex Williamson | qemu_set_fd_handler(*pfd, vfio_intx_interrupt, NULL, vdev);
|
503 | 65501a74 | Alex Williamson | |
504 | 1a403133 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); |
505 | 1a403133 | Alex Williamson | g_free(irq_set); |
506 | 1a403133 | Alex Williamson | if (ret) {
|
507 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: Failed to setup INTx fd: %m");
|
508 | 1a403133 | Alex Williamson | qemu_set_fd_handler(*pfd, NULL, NULL, vdev); |
509 | ce59af2d | Alex Williamson | event_notifier_cleanup(&vdev->intx.interrupt); |
510 | 65501a74 | Alex Williamson | return -errno;
|
511 | 65501a74 | Alex Williamson | } |
512 | 65501a74 | Alex Williamson | |
513 | e1d1e586 | Alex Williamson | vfio_enable_intx_kvm(vdev); |
514 | e1d1e586 | Alex Williamson | |
515 | 65501a74 | Alex Williamson | vdev->interrupt = VFIO_INT_INTx; |
516 | 65501a74 | Alex Williamson | |
517 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
|
518 | 65501a74 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
519 | 65501a74 | Alex Williamson | |
520 | 65501a74 | Alex Williamson | return 0; |
521 | 65501a74 | Alex Williamson | } |
522 | 65501a74 | Alex Williamson | |
523 | 65501a74 | Alex Williamson | static void vfio_disable_intx(VFIODevice *vdev) |
524 | 65501a74 | Alex Williamson | { |
525 | 65501a74 | Alex Williamson | int fd;
|
526 | 65501a74 | Alex Williamson | |
527 | ea486926 | Alex Williamson | qemu_del_timer(vdev->intx.mmap_timer); |
528 | e1d1e586 | Alex Williamson | vfio_disable_intx_kvm(vdev); |
529 | 65501a74 | Alex Williamson | vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX); |
530 | 65501a74 | Alex Williamson | vdev->intx.pending = false;
|
531 | 65501a74 | Alex Williamson | qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
|
532 | 65501a74 | Alex Williamson | vfio_mmap_set_enabled(vdev, true);
|
533 | 65501a74 | Alex Williamson | |
534 | 65501a74 | Alex Williamson | fd = event_notifier_get_fd(&vdev->intx.interrupt); |
535 | 65501a74 | Alex Williamson | qemu_set_fd_handler(fd, NULL, NULL, vdev); |
536 | 65501a74 | Alex Williamson | event_notifier_cleanup(&vdev->intx.interrupt); |
537 | 65501a74 | Alex Williamson | |
538 | 65501a74 | Alex Williamson | vdev->interrupt = VFIO_INT_NONE; |
539 | 65501a74 | Alex Williamson | |
540 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
|
541 | 65501a74 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
542 | 65501a74 | Alex Williamson | } |
543 | 65501a74 | Alex Williamson | |
544 | 65501a74 | Alex Williamson | /*
|
545 | 65501a74 | Alex Williamson | * MSI/X
|
546 | 65501a74 | Alex Williamson | */
|
547 | 65501a74 | Alex Williamson | static void vfio_msi_interrupt(void *opaque) |
548 | 65501a74 | Alex Williamson | { |
549 | 65501a74 | Alex Williamson | VFIOMSIVector *vector = opaque; |
550 | 65501a74 | Alex Williamson | VFIODevice *vdev = vector->vdev; |
551 | 65501a74 | Alex Williamson | int nr = vector - vdev->msi_vectors;
|
552 | 65501a74 | Alex Williamson | |
553 | 65501a74 | Alex Williamson | if (!event_notifier_test_and_clear(&vector->interrupt)) {
|
554 | 65501a74 | Alex Williamson | return;
|
555 | 65501a74 | Alex Williamson | } |
556 | 65501a74 | Alex Williamson | |
557 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) vector %d\n", __func__,
|
558 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
559 | 65501a74 | Alex Williamson | vdev->host.function, nr); |
560 | 65501a74 | Alex Williamson | |
561 | 65501a74 | Alex Williamson | if (vdev->interrupt == VFIO_INT_MSIX) {
|
562 | 65501a74 | Alex Williamson | msix_notify(&vdev->pdev, nr); |
563 | 65501a74 | Alex Williamson | } else if (vdev->interrupt == VFIO_INT_MSI) { |
564 | 65501a74 | Alex Williamson | msi_notify(&vdev->pdev, nr); |
565 | 65501a74 | Alex Williamson | } else {
|
566 | 312fd5f2 | Markus Armbruster | error_report("vfio: MSI interrupt receieved, but not enabled?");
|
567 | 65501a74 | Alex Williamson | } |
568 | 65501a74 | Alex Williamson | } |
569 | 65501a74 | Alex Williamson | |
570 | 65501a74 | Alex Williamson | static int vfio_enable_vectors(VFIODevice *vdev, bool msix) |
571 | 65501a74 | Alex Williamson | { |
572 | 65501a74 | Alex Williamson | struct vfio_irq_set *irq_set;
|
573 | 65501a74 | Alex Williamson | int ret = 0, i, argsz; |
574 | 65501a74 | Alex Williamson | int32_t *fds; |
575 | 65501a74 | Alex Williamson | |
576 | 65501a74 | Alex Williamson | argsz = sizeof(*irq_set) + (vdev->nr_vectors * sizeof(*fds)); |
577 | 65501a74 | Alex Williamson | |
578 | 65501a74 | Alex Williamson | irq_set = g_malloc0(argsz); |
579 | 65501a74 | Alex Williamson | irq_set->argsz = argsz; |
580 | 65501a74 | Alex Williamson | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; |
581 | 65501a74 | Alex Williamson | irq_set->index = msix ? VFIO_PCI_MSIX_IRQ_INDEX : VFIO_PCI_MSI_IRQ_INDEX; |
582 | 65501a74 | Alex Williamson | irq_set->start = 0;
|
583 | 65501a74 | Alex Williamson | irq_set->count = vdev->nr_vectors; |
584 | 65501a74 | Alex Williamson | fds = (int32_t *)&irq_set->data; |
585 | 65501a74 | Alex Williamson | |
586 | 65501a74 | Alex Williamson | for (i = 0; i < vdev->nr_vectors; i++) { |
587 | 65501a74 | Alex Williamson | if (!vdev->msi_vectors[i].use) {
|
588 | 65501a74 | Alex Williamson | fds[i] = -1;
|
589 | 65501a74 | Alex Williamson | continue;
|
590 | 65501a74 | Alex Williamson | } |
591 | 65501a74 | Alex Williamson | |
592 | 65501a74 | Alex Williamson | fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt); |
593 | 65501a74 | Alex Williamson | } |
594 | 65501a74 | Alex Williamson | |
595 | 65501a74 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); |
596 | 65501a74 | Alex Williamson | |
597 | 65501a74 | Alex Williamson | g_free(irq_set); |
598 | 65501a74 | Alex Williamson | |
599 | 65501a74 | Alex Williamson | return ret;
|
600 | 65501a74 | Alex Williamson | } |
601 | 65501a74 | Alex Williamson | |
602 | b0223e29 | Alex Williamson | static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, |
603 | b0223e29 | Alex Williamson | MSIMessage *msg, IOHandler *handler) |
604 | 65501a74 | Alex Williamson | { |
605 | 65501a74 | Alex Williamson | VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); |
606 | 65501a74 | Alex Williamson | VFIOMSIVector *vector; |
607 | 65501a74 | Alex Williamson | int ret;
|
608 | 65501a74 | Alex Williamson | |
609 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) vector %d used\n", __func__,
|
610 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
611 | 65501a74 | Alex Williamson | vdev->host.function, nr); |
612 | 65501a74 | Alex Williamson | |
613 | 65501a74 | Alex Williamson | vector = &vdev->msi_vectors[nr]; |
614 | 65501a74 | Alex Williamson | vector->vdev = vdev; |
615 | 65501a74 | Alex Williamson | vector->use = true;
|
616 | 65501a74 | Alex Williamson | |
617 | 65501a74 | Alex Williamson | msix_vector_use(pdev, nr); |
618 | 65501a74 | Alex Williamson | |
619 | 65501a74 | Alex Williamson | if (event_notifier_init(&vector->interrupt, 0)) { |
620 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: event_notifier_init failed");
|
621 | 65501a74 | Alex Williamson | } |
622 | 65501a74 | Alex Williamson | |
623 | 65501a74 | Alex Williamson | /*
|
624 | 65501a74 | Alex Williamson | * Attempt to enable route through KVM irqchip,
|
625 | 65501a74 | Alex Williamson | * default to userspace handling if unavailable.
|
626 | 65501a74 | Alex Williamson | */
|
627 | b0223e29 | Alex Williamson | vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1;
|
628 | 65501a74 | Alex Williamson | if (vector->virq < 0 || |
629 | 65501a74 | Alex Williamson | kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, |
630 | 65501a74 | Alex Williamson | vector->virq) < 0) {
|
631 | 65501a74 | Alex Williamson | if (vector->virq >= 0) { |
632 | 65501a74 | Alex Williamson | kvm_irqchip_release_virq(kvm_state, vector->virq); |
633 | 65501a74 | Alex Williamson | vector->virq = -1;
|
634 | 65501a74 | Alex Williamson | } |
635 | 65501a74 | Alex Williamson | qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), |
636 | b0223e29 | Alex Williamson | handler, NULL, vector);
|
637 | 65501a74 | Alex Williamson | } |
638 | 65501a74 | Alex Williamson | |
639 | 65501a74 | Alex Williamson | /*
|
640 | 65501a74 | Alex Williamson | * We don't want to have the host allocate all possible MSI vectors
|
641 | 65501a74 | Alex Williamson | * for a device if they're not in use, so we shutdown and incrementally
|
642 | 65501a74 | Alex Williamson | * increase them as needed.
|
643 | 65501a74 | Alex Williamson | */
|
644 | 65501a74 | Alex Williamson | if (vdev->nr_vectors < nr + 1) { |
645 | 65501a74 | Alex Williamson | vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX); |
646 | 65501a74 | Alex Williamson | vdev->nr_vectors = nr + 1;
|
647 | 65501a74 | Alex Williamson | ret = vfio_enable_vectors(vdev, true);
|
648 | 65501a74 | Alex Williamson | if (ret) {
|
649 | 312fd5f2 | Markus Armbruster | error_report("vfio: failed to enable vectors, %d", ret);
|
650 | 65501a74 | Alex Williamson | } |
651 | 65501a74 | Alex Williamson | } else {
|
652 | 1a403133 | Alex Williamson | int argsz;
|
653 | 1a403133 | Alex Williamson | struct vfio_irq_set *irq_set;
|
654 | 1a403133 | Alex Williamson | int32_t *pfd; |
655 | 1a403133 | Alex Williamson | |
656 | 1a403133 | Alex Williamson | argsz = sizeof(*irq_set) + sizeof(*pfd); |
657 | 1a403133 | Alex Williamson | |
658 | 1a403133 | Alex Williamson | irq_set = g_malloc0(argsz); |
659 | 1a403133 | Alex Williamson | irq_set->argsz = argsz; |
660 | 1a403133 | Alex Williamson | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | |
661 | 1a403133 | Alex Williamson | VFIO_IRQ_SET_ACTION_TRIGGER; |
662 | 1a403133 | Alex Williamson | irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; |
663 | 1a403133 | Alex Williamson | irq_set->start = nr; |
664 | 1a403133 | Alex Williamson | irq_set->count = 1;
|
665 | 1a403133 | Alex Williamson | pfd = (int32_t *)&irq_set->data; |
666 | 1a403133 | Alex Williamson | |
667 | 1a403133 | Alex Williamson | *pfd = event_notifier_get_fd(&vector->interrupt); |
668 | 1a403133 | Alex Williamson | |
669 | 1a403133 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); |
670 | 1a403133 | Alex Williamson | g_free(irq_set); |
671 | 65501a74 | Alex Williamson | if (ret) {
|
672 | 312fd5f2 | Markus Armbruster | error_report("vfio: failed to modify vector, %d", ret);
|
673 | 65501a74 | Alex Williamson | } |
674 | 65501a74 | Alex Williamson | } |
675 | 65501a74 | Alex Williamson | |
676 | 65501a74 | Alex Williamson | return 0; |
677 | 65501a74 | Alex Williamson | } |
678 | 65501a74 | Alex Williamson | |
679 | b0223e29 | Alex Williamson | static int vfio_msix_vector_use(PCIDevice *pdev, |
680 | b0223e29 | Alex Williamson | unsigned int nr, MSIMessage msg) |
681 | b0223e29 | Alex Williamson | { |
682 | b0223e29 | Alex Williamson | return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt);
|
683 | b0223e29 | Alex Williamson | } |
684 | b0223e29 | Alex Williamson | |
685 | 65501a74 | Alex Williamson | static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) |
686 | 65501a74 | Alex Williamson | { |
687 | 65501a74 | Alex Williamson | VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); |
688 | 65501a74 | Alex Williamson | VFIOMSIVector *vector = &vdev->msi_vectors[nr]; |
689 | 1a403133 | Alex Williamson | int argsz;
|
690 | 1a403133 | Alex Williamson | struct vfio_irq_set *irq_set;
|
691 | 1a403133 | Alex Williamson | int32_t *pfd; |
692 | 65501a74 | Alex Williamson | |
693 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
|
694 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
695 | 65501a74 | Alex Williamson | vdev->host.function, nr); |
696 | 65501a74 | Alex Williamson | |
697 | 65501a74 | Alex Williamson | /*
|
698 | 65501a74 | Alex Williamson | * XXX What's the right thing to do here? This turns off the interrupt
|
699 | 65501a74 | Alex Williamson | * completely, but do we really just want to switch the interrupt to
|
700 | 65501a74 | Alex Williamson | * bouncing through userspace and let msix.c drop it? Not sure.
|
701 | 65501a74 | Alex Williamson | */
|
702 | 65501a74 | Alex Williamson | msix_vector_unuse(pdev, nr); |
703 | 1a403133 | Alex Williamson | |
704 | 1a403133 | Alex Williamson | argsz = sizeof(*irq_set) + sizeof(*pfd); |
705 | 1a403133 | Alex Williamson | |
706 | 1a403133 | Alex Williamson | irq_set = g_malloc0(argsz); |
707 | 1a403133 | Alex Williamson | irq_set->argsz = argsz; |
708 | 1a403133 | Alex Williamson | irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | |
709 | 1a403133 | Alex Williamson | VFIO_IRQ_SET_ACTION_TRIGGER; |
710 | 1a403133 | Alex Williamson | irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; |
711 | 1a403133 | Alex Williamson | irq_set->start = nr; |
712 | 1a403133 | Alex Williamson | irq_set->count = 1;
|
713 | 1a403133 | Alex Williamson | pfd = (int32_t *)&irq_set->data; |
714 | 1a403133 | Alex Williamson | |
715 | 1a403133 | Alex Williamson | *pfd = -1;
|
716 | 1a403133 | Alex Williamson | |
717 | 1a403133 | Alex Williamson | ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); |
718 | 1a403133 | Alex Williamson | |
719 | 1a403133 | Alex Williamson | g_free(irq_set); |
720 | 65501a74 | Alex Williamson | |
721 | 65501a74 | Alex Williamson | if (vector->virq < 0) { |
722 | 65501a74 | Alex Williamson | qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), |
723 | 65501a74 | Alex Williamson | NULL, NULL, NULL); |
724 | 65501a74 | Alex Williamson | } else {
|
725 | 65501a74 | Alex Williamson | kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt, |
726 | 65501a74 | Alex Williamson | vector->virq); |
727 | 65501a74 | Alex Williamson | kvm_irqchip_release_virq(kvm_state, vector->virq); |
728 | 65501a74 | Alex Williamson | vector->virq = -1;
|
729 | 65501a74 | Alex Williamson | } |
730 | 65501a74 | Alex Williamson | |
731 | 65501a74 | Alex Williamson | event_notifier_cleanup(&vector->interrupt); |
732 | 65501a74 | Alex Williamson | vector->use = false;
|
733 | 65501a74 | Alex Williamson | } |
734 | 65501a74 | Alex Williamson | |
735 | fd704adc | Alex Williamson | static void vfio_enable_msix(VFIODevice *vdev) |
736 | fd704adc | Alex Williamson | { |
737 | fd704adc | Alex Williamson | vfio_disable_interrupts(vdev); |
738 | fd704adc | Alex Williamson | |
739 | fd704adc | Alex Williamson | vdev->msi_vectors = g_malloc0(vdev->msix->entries * sizeof(VFIOMSIVector));
|
740 | fd704adc | Alex Williamson | |
741 | fd704adc | Alex Williamson | vdev->interrupt = VFIO_INT_MSIX; |
742 | fd704adc | Alex Williamson | |
743 | b0223e29 | Alex Williamson | /*
|
744 | b0223e29 | Alex Williamson | * Some communication channels between VF & PF or PF & fw rely on the
|
745 | b0223e29 | Alex Williamson | * physical state of the device and expect that enabling MSI-X from the
|
746 | b0223e29 | Alex Williamson | * guest enables the same on the host. When our guest is Linux, the
|
747 | b0223e29 | Alex Williamson | * guest driver call to pci_enable_msix() sets the enabling bit in the
|
748 | b0223e29 | Alex Williamson | * MSI-X capability, but leaves the vector table masked. We therefore
|
749 | b0223e29 | Alex Williamson | * can't rely on a vector_use callback (from request_irq() in the guest)
|
750 | b0223e29 | Alex Williamson | * to switch the physical device into MSI-X mode because that may come a
|
751 | b0223e29 | Alex Williamson | * long time after pci_enable_msix(). This code enables vector 0 with
|
752 | b0223e29 | Alex Williamson | * triggering to userspace, then immediately release the vector, leaving
|
753 | b0223e29 | Alex Williamson | * the physical device with no vectors enabled, but MSI-X enabled, just
|
754 | b0223e29 | Alex Williamson | * like the guest view.
|
755 | b0223e29 | Alex Williamson | */
|
756 | b0223e29 | Alex Williamson | vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL); |
757 | b0223e29 | Alex Williamson | vfio_msix_vector_release(&vdev->pdev, 0);
|
758 | b0223e29 | Alex Williamson | |
759 | fd704adc | Alex Williamson | if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
|
760 | bbef882c | Michael S. Tsirkin | vfio_msix_vector_release, NULL)) {
|
761 | 312fd5f2 | Markus Armbruster | error_report("vfio: msix_set_vector_notifiers failed");
|
762 | fd704adc | Alex Williamson | } |
763 | fd704adc | Alex Williamson | |
764 | fd704adc | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
|
765 | fd704adc | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
766 | fd704adc | Alex Williamson | } |
767 | fd704adc | Alex Williamson | |
768 | 65501a74 | Alex Williamson | static void vfio_enable_msi(VFIODevice *vdev) |
769 | 65501a74 | Alex Williamson | { |
770 | 65501a74 | Alex Williamson | int ret, i;
|
771 | 65501a74 | Alex Williamson | |
772 | 65501a74 | Alex Williamson | vfio_disable_interrupts(vdev); |
773 | 65501a74 | Alex Williamson | |
774 | 65501a74 | Alex Williamson | vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); |
775 | 65501a74 | Alex Williamson | retry:
|
776 | 65501a74 | Alex Williamson | vdev->msi_vectors = g_malloc0(vdev->nr_vectors * sizeof(VFIOMSIVector));
|
777 | 65501a74 | Alex Williamson | |
778 | 65501a74 | Alex Williamson | for (i = 0; i < vdev->nr_vectors; i++) { |
779 | 65501a74 | Alex Williamson | MSIMessage msg; |
780 | 65501a74 | Alex Williamson | VFIOMSIVector *vector = &vdev->msi_vectors[i]; |
781 | 65501a74 | Alex Williamson | |
782 | 65501a74 | Alex Williamson | vector->vdev = vdev; |
783 | 65501a74 | Alex Williamson | vector->use = true;
|
784 | 65501a74 | Alex Williamson | |
785 | 65501a74 | Alex Williamson | if (event_notifier_init(&vector->interrupt, 0)) { |
786 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: event_notifier_init failed");
|
787 | 65501a74 | Alex Williamson | } |
788 | 65501a74 | Alex Williamson | |
789 | a771c517 | Alex Williamson | msg = msi_get_message(&vdev->pdev, i); |
790 | 65501a74 | Alex Williamson | |
791 | 65501a74 | Alex Williamson | /*
|
792 | 65501a74 | Alex Williamson | * Attempt to enable route through KVM irqchip,
|
793 | 65501a74 | Alex Williamson | * default to userspace handling if unavailable.
|
794 | 65501a74 | Alex Williamson | */
|
795 | 65501a74 | Alex Williamson | vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg); |
796 | 65501a74 | Alex Williamson | if (vector->virq < 0 || |
797 | 65501a74 | Alex Williamson | kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, |
798 | 65501a74 | Alex Williamson | vector->virq) < 0) {
|
799 | 65501a74 | Alex Williamson | qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), |
800 | 65501a74 | Alex Williamson | vfio_msi_interrupt, NULL, vector);
|
801 | 65501a74 | Alex Williamson | } |
802 | 65501a74 | Alex Williamson | } |
803 | 65501a74 | Alex Williamson | |
804 | 65501a74 | Alex Williamson | ret = vfio_enable_vectors(vdev, false);
|
805 | 65501a74 | Alex Williamson | if (ret) {
|
806 | 65501a74 | Alex Williamson | if (ret < 0) { |
807 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error: Failed to setup MSI fds: %m");
|
808 | 65501a74 | Alex Williamson | } else if (ret != vdev->nr_vectors) { |
809 | 65501a74 | Alex Williamson | error_report("vfio: Error: Failed to enable %d "
|
810 | 312fd5f2 | Markus Armbruster | "MSI vectors, retry with %d", vdev->nr_vectors, ret);
|
811 | 65501a74 | Alex Williamson | } |
812 | 65501a74 | Alex Williamson | |
813 | 65501a74 | Alex Williamson | for (i = 0; i < vdev->nr_vectors; i++) { |
814 | 65501a74 | Alex Williamson | VFIOMSIVector *vector = &vdev->msi_vectors[i]; |
815 | 65501a74 | Alex Williamson | if (vector->virq >= 0) { |
816 | 65501a74 | Alex Williamson | kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt, |
817 | 65501a74 | Alex Williamson | vector->virq); |
818 | 65501a74 | Alex Williamson | kvm_irqchip_release_virq(kvm_state, vector->virq); |
819 | 65501a74 | Alex Williamson | vector->virq = -1;
|
820 | 65501a74 | Alex Williamson | } else {
|
821 | 65501a74 | Alex Williamson | qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), |
822 | 65501a74 | Alex Williamson | NULL, NULL, NULL); |
823 | 65501a74 | Alex Williamson | } |
824 | 65501a74 | Alex Williamson | event_notifier_cleanup(&vector->interrupt); |
825 | 65501a74 | Alex Williamson | } |
826 | 65501a74 | Alex Williamson | |
827 | 65501a74 | Alex Williamson | g_free(vdev->msi_vectors); |
828 | 65501a74 | Alex Williamson | |
829 | 65501a74 | Alex Williamson | if (ret > 0 && ret != vdev->nr_vectors) { |
830 | 65501a74 | Alex Williamson | vdev->nr_vectors = ret; |
831 | 65501a74 | Alex Williamson | goto retry;
|
832 | 65501a74 | Alex Williamson | } |
833 | 65501a74 | Alex Williamson | vdev->nr_vectors = 0;
|
834 | 65501a74 | Alex Williamson | |
835 | 65501a74 | Alex Williamson | return;
|
836 | 65501a74 | Alex Williamson | } |
837 | 65501a74 | Alex Williamson | |
838 | fd704adc | Alex Williamson | vdev->interrupt = VFIO_INT_MSI; |
839 | fd704adc | Alex Williamson | |
840 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
|
841 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
842 | 65501a74 | Alex Williamson | vdev->host.function, vdev->nr_vectors); |
843 | 65501a74 | Alex Williamson | } |
844 | 65501a74 | Alex Williamson | |
845 | fd704adc | Alex Williamson | static void vfio_disable_msi_common(VFIODevice *vdev) |
846 | fd704adc | Alex Williamson | { |
847 | fd704adc | Alex Williamson | g_free(vdev->msi_vectors); |
848 | fd704adc | Alex Williamson | vdev->msi_vectors = NULL;
|
849 | fd704adc | Alex Williamson | vdev->nr_vectors = 0;
|
850 | fd704adc | Alex Williamson | vdev->interrupt = VFIO_INT_NONE; |
851 | fd704adc | Alex Williamson | |
852 | fd704adc | Alex Williamson | vfio_enable_intx(vdev); |
853 | fd704adc | Alex Williamson | } |
854 | fd704adc | Alex Williamson | |
855 | fd704adc | Alex Williamson | static void vfio_disable_msix(VFIODevice *vdev) |
856 | fd704adc | Alex Williamson | { |
857 | fd704adc | Alex Williamson | msix_unset_vector_notifiers(&vdev->pdev); |
858 | fd704adc | Alex Williamson | |
859 | fd704adc | Alex Williamson | if (vdev->nr_vectors) {
|
860 | fd704adc | Alex Williamson | vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX); |
861 | fd704adc | Alex Williamson | } |
862 | fd704adc | Alex Williamson | |
863 | fd704adc | Alex Williamson | vfio_disable_msi_common(vdev); |
864 | fd704adc | Alex Williamson | |
865 | a011b10e | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
|
866 | a011b10e | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
867 | fd704adc | Alex Williamson | } |
868 | fd704adc | Alex Williamson | |
869 | fd704adc | Alex Williamson | static void vfio_disable_msi(VFIODevice *vdev) |
870 | 65501a74 | Alex Williamson | { |
871 | 65501a74 | Alex Williamson | int i;
|
872 | 65501a74 | Alex Williamson | |
873 | fd704adc | Alex Williamson | vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX); |
874 | 65501a74 | Alex Williamson | |
875 | 65501a74 | Alex Williamson | for (i = 0; i < vdev->nr_vectors; i++) { |
876 | 65501a74 | Alex Williamson | VFIOMSIVector *vector = &vdev->msi_vectors[i]; |
877 | 65501a74 | Alex Williamson | |
878 | 65501a74 | Alex Williamson | if (!vector->use) {
|
879 | 65501a74 | Alex Williamson | continue;
|
880 | 65501a74 | Alex Williamson | } |
881 | 65501a74 | Alex Williamson | |
882 | 65501a74 | Alex Williamson | if (vector->virq >= 0) { |
883 | 65501a74 | Alex Williamson | kvm_irqchip_remove_irqfd_notifier(kvm_state, |
884 | 65501a74 | Alex Williamson | &vector->interrupt, vector->virq); |
885 | 65501a74 | Alex Williamson | kvm_irqchip_release_virq(kvm_state, vector->virq); |
886 | 65501a74 | Alex Williamson | vector->virq = -1;
|
887 | 65501a74 | Alex Williamson | } else {
|
888 | 65501a74 | Alex Williamson | qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), |
889 | 65501a74 | Alex Williamson | NULL, NULL, NULL); |
890 | 65501a74 | Alex Williamson | } |
891 | 65501a74 | Alex Williamson | |
892 | 65501a74 | Alex Williamson | event_notifier_cleanup(&vector->interrupt); |
893 | 65501a74 | Alex Williamson | } |
894 | 65501a74 | Alex Williamson | |
895 | fd704adc | Alex Williamson | vfio_disable_msi_common(vdev); |
896 | 65501a74 | Alex Williamson | |
897 | fd704adc | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
|
898 | fd704adc | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
899 | 65501a74 | Alex Williamson | } |
900 | 65501a74 | Alex Williamson | |
901 | 65501a74 | Alex Williamson | /*
|
902 | 65501a74 | Alex Williamson | * IO Port/MMIO - Beware of the endians, VFIO is always little endian
|
903 | 65501a74 | Alex Williamson | */
|
904 | a8170e5e | Avi Kivity | static void vfio_bar_write(void *opaque, hwaddr addr, |
905 | 65501a74 | Alex Williamson | uint64_t data, unsigned size)
|
906 | 65501a74 | Alex Williamson | { |
907 | 65501a74 | Alex Williamson | VFIOBAR *bar = opaque; |
908 | 65501a74 | Alex Williamson | union {
|
909 | 65501a74 | Alex Williamson | uint8_t byte; |
910 | 65501a74 | Alex Williamson | uint16_t word; |
911 | 65501a74 | Alex Williamson | uint32_t dword; |
912 | 65501a74 | Alex Williamson | uint64_t qword; |
913 | 65501a74 | Alex Williamson | } buf; |
914 | 65501a74 | Alex Williamson | |
915 | 65501a74 | Alex Williamson | switch (size) {
|
916 | 65501a74 | Alex Williamson | case 1: |
917 | 65501a74 | Alex Williamson | buf.byte = data; |
918 | 65501a74 | Alex Williamson | break;
|
919 | 65501a74 | Alex Williamson | case 2: |
920 | 65501a74 | Alex Williamson | buf.word = cpu_to_le16(data); |
921 | 65501a74 | Alex Williamson | break;
|
922 | 65501a74 | Alex Williamson | case 4: |
923 | 65501a74 | Alex Williamson | buf.dword = cpu_to_le32(data); |
924 | 65501a74 | Alex Williamson | break;
|
925 | 65501a74 | Alex Williamson | default:
|
926 | 65501a74 | Alex Williamson | hw_error("vfio: unsupported write size, %d bytes\n", size);
|
927 | 65501a74 | Alex Williamson | break;
|
928 | 65501a74 | Alex Williamson | } |
929 | 65501a74 | Alex Williamson | |
930 | 65501a74 | Alex Williamson | if (pwrite(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
|
931 | 312fd5f2 | Markus Armbruster | error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m", |
932 | 65501a74 | Alex Williamson | __func__, addr, data, size); |
933 | 65501a74 | Alex Williamson | } |
934 | 65501a74 | Alex Williamson | |
935 | 82ca8912 | Alex Williamson | #ifdef DEBUG_VFIO
|
936 | 82ca8912 | Alex Williamson | { |
937 | 82ca8912 | Alex Williamson | VFIODevice *vdev = container_of(bar, VFIODevice, bars[bar->nr]); |
938 | 82ca8912 | Alex Williamson | |
939 | 82ca8912 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx", 0x%"PRIx64 |
940 | 82ca8912 | Alex Williamson | ", %d)\n", __func__, vdev->host.domain, vdev->host.bus,
|
941 | 82ca8912 | Alex Williamson | vdev->host.slot, vdev->host.function, bar->nr, addr, |
942 | 82ca8912 | Alex Williamson | data, size); |
943 | 82ca8912 | Alex Williamson | } |
944 | 82ca8912 | Alex Williamson | #endif
|
945 | 65501a74 | Alex Williamson | |
946 | 65501a74 | Alex Williamson | /*
|
947 | 65501a74 | Alex Williamson | * A read or write to a BAR always signals an INTx EOI. This will
|
948 | 65501a74 | Alex Williamson | * do nothing if not pending (including not in INTx mode). We assume
|
949 | 65501a74 | Alex Williamson | * that a BAR access is in response to an interrupt and that BAR
|
950 | 65501a74 | Alex Williamson | * accesses will service the interrupt. Unfortunately, we don't know
|
951 | 65501a74 | Alex Williamson | * which access will service the interrupt, so we're potentially
|
952 | 65501a74 | Alex Williamson | * getting quite a few host interrupts per guest interrupt.
|
953 | 65501a74 | Alex Williamson | */
|
954 | 3a4f2816 | Jan Kiszka | vfio_eoi(container_of(bar, VFIODevice, bars[bar->nr])); |
955 | 65501a74 | Alex Williamson | } |
956 | 65501a74 | Alex Williamson | |
957 | 65501a74 | Alex Williamson | static uint64_t vfio_bar_read(void *opaque, |
958 | a8170e5e | Avi Kivity | hwaddr addr, unsigned size)
|
959 | 65501a74 | Alex Williamson | { |
960 | 65501a74 | Alex Williamson | VFIOBAR *bar = opaque; |
961 | 65501a74 | Alex Williamson | union {
|
962 | 65501a74 | Alex Williamson | uint8_t byte; |
963 | 65501a74 | Alex Williamson | uint16_t word; |
964 | 65501a74 | Alex Williamson | uint32_t dword; |
965 | 65501a74 | Alex Williamson | uint64_t qword; |
966 | 65501a74 | Alex Williamson | } buf; |
967 | 65501a74 | Alex Williamson | uint64_t data = 0;
|
968 | 65501a74 | Alex Williamson | |
969 | 65501a74 | Alex Williamson | if (pread(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
|
970 | 312fd5f2 | Markus Armbruster | error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m", |
971 | 65501a74 | Alex Williamson | __func__, addr, size); |
972 | 65501a74 | Alex Williamson | return (uint64_t)-1; |
973 | 65501a74 | Alex Williamson | } |
974 | 65501a74 | Alex Williamson | |
975 | 65501a74 | Alex Williamson | switch (size) {
|
976 | 65501a74 | Alex Williamson | case 1: |
977 | 65501a74 | Alex Williamson | data = buf.byte; |
978 | 65501a74 | Alex Williamson | break;
|
979 | 65501a74 | Alex Williamson | case 2: |
980 | 65501a74 | Alex Williamson | data = le16_to_cpu(buf.word); |
981 | 65501a74 | Alex Williamson | break;
|
982 | 65501a74 | Alex Williamson | case 4: |
983 | 65501a74 | Alex Williamson | data = le32_to_cpu(buf.dword); |
984 | 65501a74 | Alex Williamson | break;
|
985 | 65501a74 | Alex Williamson | default:
|
986 | 65501a74 | Alex Williamson | hw_error("vfio: unsupported read size, %d bytes\n", size);
|
987 | 65501a74 | Alex Williamson | break;
|
988 | 65501a74 | Alex Williamson | } |
989 | 65501a74 | Alex Williamson | |
990 | 82ca8912 | Alex Williamson | #ifdef DEBUG_VFIO
|
991 | 82ca8912 | Alex Williamson | { |
992 | 82ca8912 | Alex Williamson | VFIODevice *vdev = container_of(bar, VFIODevice, bars[bar->nr]); |
993 | 82ca8912 | Alex Williamson | |
994 | 82ca8912 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x:BAR%d+0x%"HWADDR_PRIx
|
995 | 82ca8912 | Alex Williamson | ", %d) = 0x%"PRIx64"\n", __func__, vdev->host.domain, |
996 | 82ca8912 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function, |
997 | 82ca8912 | Alex Williamson | bar->nr, addr, size, data); |
998 | 82ca8912 | Alex Williamson | } |
999 | 82ca8912 | Alex Williamson | #endif
|
1000 | 65501a74 | Alex Williamson | |
1001 | 65501a74 | Alex Williamson | /* Same as write above */
|
1002 | 3a4f2816 | Jan Kiszka | vfio_eoi(container_of(bar, VFIODevice, bars[bar->nr])); |
1003 | 65501a74 | Alex Williamson | |
1004 | 65501a74 | Alex Williamson | return data;
|
1005 | 65501a74 | Alex Williamson | } |
1006 | 65501a74 | Alex Williamson | |
1007 | 65501a74 | Alex Williamson | static const MemoryRegionOps vfio_bar_ops = { |
1008 | 65501a74 | Alex Williamson | .read = vfio_bar_read, |
1009 | 65501a74 | Alex Williamson | .write = vfio_bar_write, |
1010 | 65501a74 | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1011 | 65501a74 | Alex Williamson | }; |
1012 | 65501a74 | Alex Williamson | |
1013 | f15689c7 | Alex Williamson | static void vfio_vga_write(void *opaque, hwaddr addr, |
1014 | f15689c7 | Alex Williamson | uint64_t data, unsigned size)
|
1015 | f15689c7 | Alex Williamson | { |
1016 | f15689c7 | Alex Williamson | VFIOVGARegion *region = opaque; |
1017 | f15689c7 | Alex Williamson | VFIOVGA *vga = container_of(region, VFIOVGA, region[region->nr]); |
1018 | f15689c7 | Alex Williamson | union {
|
1019 | f15689c7 | Alex Williamson | uint8_t byte; |
1020 | f15689c7 | Alex Williamson | uint16_t word; |
1021 | f15689c7 | Alex Williamson | uint32_t dword; |
1022 | f15689c7 | Alex Williamson | uint64_t qword; |
1023 | f15689c7 | Alex Williamson | } buf; |
1024 | f15689c7 | Alex Williamson | off_t offset = vga->fd_offset + region->offset + addr; |
1025 | f15689c7 | Alex Williamson | |
1026 | f15689c7 | Alex Williamson | switch (size) {
|
1027 | f15689c7 | Alex Williamson | case 1: |
1028 | f15689c7 | Alex Williamson | buf.byte = data; |
1029 | f15689c7 | Alex Williamson | break;
|
1030 | f15689c7 | Alex Williamson | case 2: |
1031 | f15689c7 | Alex Williamson | buf.word = cpu_to_le16(data); |
1032 | f15689c7 | Alex Williamson | break;
|
1033 | f15689c7 | Alex Williamson | case 4: |
1034 | f15689c7 | Alex Williamson | buf.dword = cpu_to_le32(data); |
1035 | f15689c7 | Alex Williamson | break;
|
1036 | f15689c7 | Alex Williamson | default:
|
1037 | f15689c7 | Alex Williamson | hw_error("vfio: unsupported write size, %d bytes\n", size);
|
1038 | f15689c7 | Alex Williamson | break;
|
1039 | f15689c7 | Alex Williamson | } |
1040 | f15689c7 | Alex Williamson | |
1041 | f15689c7 | Alex Williamson | if (pwrite(vga->fd, &buf, size, offset) != size) {
|
1042 | f15689c7 | Alex Williamson | error_report("%s(,0x%"HWADDR_PRIx", 0x%"PRIx64", %d) failed: %m", |
1043 | f15689c7 | Alex Williamson | __func__, region->offset + addr, data, size); |
1044 | f15689c7 | Alex Williamson | } |
1045 | f15689c7 | Alex Williamson | |
1046 | f15689c7 | Alex Williamson | DPRINTF("%s(0x%"HWADDR_PRIx", 0x%"PRIx64", %d)\n", |
1047 | f15689c7 | Alex Williamson | __func__, region->offset + addr, data, size); |
1048 | f15689c7 | Alex Williamson | } |
1049 | f15689c7 | Alex Williamson | |
1050 | f15689c7 | Alex Williamson | static uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size) |
1051 | f15689c7 | Alex Williamson | { |
1052 | f15689c7 | Alex Williamson | VFIOVGARegion *region = opaque; |
1053 | f15689c7 | Alex Williamson | VFIOVGA *vga = container_of(region, VFIOVGA, region[region->nr]); |
1054 | f15689c7 | Alex Williamson | union {
|
1055 | f15689c7 | Alex Williamson | uint8_t byte; |
1056 | f15689c7 | Alex Williamson | uint16_t word; |
1057 | f15689c7 | Alex Williamson | uint32_t dword; |
1058 | f15689c7 | Alex Williamson | uint64_t qword; |
1059 | f15689c7 | Alex Williamson | } buf; |
1060 | f15689c7 | Alex Williamson | uint64_t data = 0;
|
1061 | f15689c7 | Alex Williamson | off_t offset = vga->fd_offset + region->offset + addr; |
1062 | f15689c7 | Alex Williamson | |
1063 | f15689c7 | Alex Williamson | if (pread(vga->fd, &buf, size, offset) != size) {
|
1064 | f15689c7 | Alex Williamson | error_report("%s(,0x%"HWADDR_PRIx", %d) failed: %m", |
1065 | f15689c7 | Alex Williamson | __func__, region->offset + addr, size); |
1066 | f15689c7 | Alex Williamson | return (uint64_t)-1; |
1067 | f15689c7 | Alex Williamson | } |
1068 | f15689c7 | Alex Williamson | |
1069 | f15689c7 | Alex Williamson | switch (size) {
|
1070 | f15689c7 | Alex Williamson | case 1: |
1071 | f15689c7 | Alex Williamson | data = buf.byte; |
1072 | f15689c7 | Alex Williamson | break;
|
1073 | f15689c7 | Alex Williamson | case 2: |
1074 | f15689c7 | Alex Williamson | data = le16_to_cpu(buf.word); |
1075 | f15689c7 | Alex Williamson | break;
|
1076 | f15689c7 | Alex Williamson | case 4: |
1077 | f15689c7 | Alex Williamson | data = le32_to_cpu(buf.dword); |
1078 | f15689c7 | Alex Williamson | break;
|
1079 | f15689c7 | Alex Williamson | default:
|
1080 | f15689c7 | Alex Williamson | hw_error("vfio: unsupported read size, %d bytes\n", size);
|
1081 | f15689c7 | Alex Williamson | break;
|
1082 | f15689c7 | Alex Williamson | } |
1083 | f15689c7 | Alex Williamson | |
1084 | f15689c7 | Alex Williamson | DPRINTF("%s(0x%"HWADDR_PRIx", %d) = 0x%"PRIx64"\n", |
1085 | f15689c7 | Alex Williamson | __func__, region->offset + addr, size, data); |
1086 | f15689c7 | Alex Williamson | |
1087 | f15689c7 | Alex Williamson | return data;
|
1088 | f15689c7 | Alex Williamson | } |
1089 | f15689c7 | Alex Williamson | |
1090 | f15689c7 | Alex Williamson | static const MemoryRegionOps vfio_vga_ops = { |
1091 | f15689c7 | Alex Williamson | .read = vfio_vga_read, |
1092 | f15689c7 | Alex Williamson | .write = vfio_vga_write, |
1093 | f15689c7 | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1094 | f15689c7 | Alex Williamson | }; |
1095 | f15689c7 | Alex Williamson | |
1096 | 65501a74 | Alex Williamson | /*
|
1097 | 7076eabc | Alex Williamson | * Device specific quirks
|
1098 | 7076eabc | Alex Williamson | */
|
1099 | 7076eabc | Alex Williamson | |
1100 | 7076eabc | Alex Williamson | #define PCI_VENDOR_ID_ATI 0x1002 |
1101 | 7076eabc | Alex Williamson | |
1102 | 7076eabc | Alex Williamson | /*
|
1103 | 7076eabc | Alex Williamson | * Device 1002:68f9 (Advanced Micro Devices [AMD] nee ATI Cedar PRO [Radeon
|
1104 | 7076eabc | Alex Williamson | * HD 5450/6350]) reports the upper byte of the physical address of the
|
1105 | 7076eabc | Alex Williamson | * I/O port BAR4 through VGA register 0x3c3. The BAR is 256 bytes, so the
|
1106 | 7076eabc | Alex Williamson | * lower byte is known to be zero. Probing for this quirk reads 0xff from
|
1107 | 7076eabc | Alex Williamson | * port 0x3c3 on some devices so we store the physical address and replace
|
1108 | 7076eabc | Alex Williamson | * reads with the virtual address any time it matches. XXX Research when
|
1109 | 7076eabc | Alex Williamson | * to enable quirk.
|
1110 | 7076eabc | Alex Williamson | */
|
1111 | 7076eabc | Alex Williamson | static uint64_t vfio_ati_3c3_quirk_read(void *opaque, |
1112 | 7076eabc | Alex Williamson | hwaddr addr, unsigned size)
|
1113 | 7076eabc | Alex Williamson | { |
1114 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1115 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1116 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1117 | 7076eabc | Alex Williamson | uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], |
1118 | 7076eabc | Alex Williamson | addr + 0x3, size);
|
1119 | 7076eabc | Alex Williamson | |
1120 | 7076eabc | Alex Williamson | if (data == quirk->data) {
|
1121 | 7076eabc | Alex Williamson | data = pci_get_byte(pdev->config + PCI_BASE_ADDRESS_4 + 1);
|
1122 | 7076eabc | Alex Williamson | DPRINTF("%s(0x3c3, 1) = 0x%"PRIx64"\n", __func__, data); |
1123 | 7076eabc | Alex Williamson | } |
1124 | 7076eabc | Alex Williamson | |
1125 | 7076eabc | Alex Williamson | return data;
|
1126 | 7076eabc | Alex Williamson | } |
1127 | 7076eabc | Alex Williamson | |
1128 | 7076eabc | Alex Williamson | static const MemoryRegionOps vfio_ati_3c3_quirk = { |
1129 | 7076eabc | Alex Williamson | .read = vfio_ati_3c3_quirk_read, |
1130 | 7076eabc | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1131 | 7076eabc | Alex Williamson | }; |
1132 | 7076eabc | Alex Williamson | |
1133 | 7076eabc | Alex Williamson | static void vfio_vga_probe_ati_3c3_quirk(VFIODevice *vdev) |
1134 | 7076eabc | Alex Williamson | { |
1135 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1136 | 7076eabc | Alex Williamson | off_t physoffset = vdev->config_offset + PCI_BASE_ADDRESS_4; |
1137 | 7076eabc | Alex Williamson | uint32_t physbar; |
1138 | 7076eabc | Alex Williamson | VFIOQuirk *quirk; |
1139 | 7076eabc | Alex Williamson | |
1140 | 7076eabc | Alex Williamson | if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI ||
|
1141 | 7076eabc | Alex Williamson | vdev->bars[4].size < 256) { |
1142 | 7076eabc | Alex Williamson | return;
|
1143 | 7076eabc | Alex Williamson | } |
1144 | 7076eabc | Alex Williamson | |
1145 | 7076eabc | Alex Williamson | /* Get I/O port BAR physical address */
|
1146 | 7076eabc | Alex Williamson | if (pread(vdev->fd, &physbar, 4, physoffset) != 4) { |
1147 | 7076eabc | Alex Williamson | error_report("vfio: probe failed for ATI/AMD 0x3c3 quirk on device "
|
1148 | 7076eabc | Alex Williamson | "%04x:%02x:%02x.%x", vdev->host.domain,
|
1149 | 7076eabc | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
1150 | 7076eabc | Alex Williamson | return;
|
1151 | 7076eabc | Alex Williamson | } |
1152 | 7076eabc | Alex Williamson | |
1153 | 7076eabc | Alex Williamson | quirk = g_malloc0(sizeof(*quirk));
|
1154 | 7076eabc | Alex Williamson | quirk->vdev = vdev; |
1155 | 7076eabc | Alex Williamson | quirk->data = (physbar >> 8) & 0xff; |
1156 | 7076eabc | Alex Williamson | |
1157 | 7076eabc | Alex Williamson | memory_region_init_io(&quirk->mem, &vfio_ati_3c3_quirk, quirk, |
1158 | 7076eabc | Alex Williamson | "vfio-ati-3c3-quirk", 1); |
1159 | 7076eabc | Alex Williamson | memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, 3,
|
1160 | 7076eabc | Alex Williamson | &quirk->mem); |
1161 | 7076eabc | Alex Williamson | |
1162 | 7076eabc | Alex Williamson | QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, |
1163 | 7076eabc | Alex Williamson | quirk, next); |
1164 | 7076eabc | Alex Williamson | |
1165 | 7076eabc | Alex Williamson | DPRINTF("Enabled ATI/AMD quirk 0x3c3 for device %04x:%02x:%02x.%x\n",
|
1166 | 7076eabc | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1167 | 7076eabc | Alex Williamson | vdev->host.function); |
1168 | 7076eabc | Alex Williamson | } |
1169 | 7076eabc | Alex Williamson | |
1170 | 7076eabc | Alex Williamson | /*
|
1171 | 7076eabc | Alex Williamson | * Device 1002:68f9 (Advanced Micro Devices [AMD] nee ATI Cedar PRO [Radeon
|
1172 | 7076eabc | Alex Williamson | * HD 5450/6350]) reports the physical address of MMIO BAR0 through a
|
1173 | 7076eabc | Alex Williamson | * write/read operation on I/O port BAR4. When uint32_t 0x4010 is written
|
1174 | 7076eabc | Alex Williamson | * to offset 0x0, the subsequent read from offset 0x4 returns the contents
|
1175 | 7076eabc | Alex Williamson | * of BAR0. Test for this quirk on all ATI/AMD devices. XXX - Note that
|
1176 | 7076eabc | Alex Williamson | * 0x10 is the offset of BAR0 in config sapce, is this a window to all of
|
1177 | 7076eabc | Alex Williamson | * config space?
|
1178 | 7076eabc | Alex Williamson | */
|
1179 | 7076eabc | Alex Williamson | static uint64_t vfio_ati_4010_quirk_read(void *opaque, |
1180 | 7076eabc | Alex Williamson | hwaddr addr, unsigned size)
|
1181 | 7076eabc | Alex Williamson | { |
1182 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1183 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1184 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1185 | 7076eabc | Alex Williamson | uint64_t data = vfio_bar_read(&vdev->bars[4], addr, size);
|
1186 | 7076eabc | Alex Williamson | |
1187 | 7076eabc | Alex Williamson | if (addr == 4 && size == 4 && quirk->data) { |
1188 | 7076eabc | Alex Williamson | data = pci_get_long(pdev->config + PCI_BASE_ADDRESS_0); |
1189 | 7076eabc | Alex Williamson | DPRINTF("%s(BAR4+0x4) = 0x%"PRIx64"\n", __func__, data); |
1190 | 7076eabc | Alex Williamson | } |
1191 | 7076eabc | Alex Williamson | |
1192 | 7076eabc | Alex Williamson | quirk->data = 0;
|
1193 | 7076eabc | Alex Williamson | |
1194 | 7076eabc | Alex Williamson | return data;
|
1195 | 7076eabc | Alex Williamson | } |
1196 | 7076eabc | Alex Williamson | |
1197 | 7076eabc | Alex Williamson | static void vfio_ati_4010_quirk_write(void *opaque, hwaddr addr, |
1198 | 7076eabc | Alex Williamson | uint64_t data, unsigned size)
|
1199 | 7076eabc | Alex Williamson | { |
1200 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1201 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1202 | 7076eabc | Alex Williamson | |
1203 | 7076eabc | Alex Williamson | vfio_bar_write(&vdev->bars[4], addr, data, size);
|
1204 | 7076eabc | Alex Williamson | |
1205 | 7076eabc | Alex Williamson | quirk->data = (addr == 0 && size == 4 && data == 0x4010) ? 1 : 0; |
1206 | 7076eabc | Alex Williamson | } |
1207 | 7076eabc | Alex Williamson | |
1208 | 7076eabc | Alex Williamson | static const MemoryRegionOps vfio_ati_4010_quirk = { |
1209 | 7076eabc | Alex Williamson | .read = vfio_ati_4010_quirk_read, |
1210 | 7076eabc | Alex Williamson | .write = vfio_ati_4010_quirk_write, |
1211 | 7076eabc | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1212 | 7076eabc | Alex Williamson | }; |
1213 | 7076eabc | Alex Williamson | |
1214 | 7076eabc | Alex Williamson | static void vfio_probe_ati_4010_quirk(VFIODevice *vdev, int nr) |
1215 | 7076eabc | Alex Williamson | { |
1216 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1217 | 7076eabc | Alex Williamson | off_t physoffset = vdev->config_offset + PCI_BASE_ADDRESS_0; |
1218 | 7076eabc | Alex Williamson | uint32_t physbar0; |
1219 | 7076eabc | Alex Williamson | uint64_t data; |
1220 | 7076eabc | Alex Williamson | VFIOQuirk *quirk; |
1221 | 7076eabc | Alex Williamson | |
1222 | 7076eabc | Alex Williamson | if (!vdev->has_vga || nr != 4 || !vdev->bars[0].size || |
1223 | 7076eabc | Alex Williamson | pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { |
1224 | 7076eabc | Alex Williamson | return;
|
1225 | 7076eabc | Alex Williamson | } |
1226 | 7076eabc | Alex Williamson | |
1227 | 7076eabc | Alex Williamson | /* Get I/O port BAR physical address */
|
1228 | 7076eabc | Alex Williamson | if (pread(vdev->fd, &physbar0, 4, physoffset) != 4) { |
1229 | 7076eabc | Alex Williamson | error_report("vfio: probe failed for ATI/AMD 0x4010 quirk on device "
|
1230 | 7076eabc | Alex Williamson | "%04x:%02x:%02x.%x", vdev->host.domain,
|
1231 | 7076eabc | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
1232 | 7076eabc | Alex Williamson | return;
|
1233 | 7076eabc | Alex Williamson | } |
1234 | 7076eabc | Alex Williamson | |
1235 | 7076eabc | Alex Williamson | /* Write 0x4010 to I/O port BAR offset 0 */
|
1236 | 7076eabc | Alex Williamson | vfio_bar_write(&vdev->bars[4], 0, 0x4010, 4); |
1237 | 7076eabc | Alex Williamson | /* Read back result */
|
1238 | 7076eabc | Alex Williamson | data = vfio_bar_read(&vdev->bars[4], 4, 4); |
1239 | 7076eabc | Alex Williamson | |
1240 | 7076eabc | Alex Williamson | /* If the register matches the physical address of BAR0, we need a quirk */
|
1241 | 7076eabc | Alex Williamson | if (data != physbar0) {
|
1242 | 7076eabc | Alex Williamson | return;
|
1243 | 7076eabc | Alex Williamson | } |
1244 | 7076eabc | Alex Williamson | |
1245 | 7076eabc | Alex Williamson | quirk = g_malloc0(sizeof(*quirk));
|
1246 | 7076eabc | Alex Williamson | quirk->vdev = vdev; |
1247 | 7076eabc | Alex Williamson | |
1248 | 7076eabc | Alex Williamson | memory_region_init_io(&quirk->mem, &vfio_ati_4010_quirk, quirk, |
1249 | 7076eabc | Alex Williamson | "vfio-ati-4010-quirk", 8); |
1250 | 7076eabc | Alex Williamson | memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1); |
1251 | 7076eabc | Alex Williamson | |
1252 | 7076eabc | Alex Williamson | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); |
1253 | 7076eabc | Alex Williamson | |
1254 | 7076eabc | Alex Williamson | DPRINTF("Enabled ATI/AMD quirk 0x4010 for device %04x:%02x:%02x.%x\n",
|
1255 | 7076eabc | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1256 | 7076eabc | Alex Williamson | vdev->host.function); |
1257 | 7076eabc | Alex Williamson | } |
1258 | 7076eabc | Alex Williamson | |
1259 | 7076eabc | Alex Williamson | /*
|
1260 | 7076eabc | Alex Williamson | * Device 1002:5b63 (Advanced Micro Devices [AMD] nee ATI RV370 [Radeon X550])
|
1261 | 7076eabc | Alex Williamson | * retrieves the upper half of the MMIO BAR0 physical address by writing
|
1262 | 7076eabc | Alex Williamson | * 0xf10 to I/O port BAR1 offset 0 and reading the result from offset 6.
|
1263 | 7076eabc | Alex Williamson | * XXX - 0x10 is the offset of BAR0 in PCI config space, this could provide
|
1264 | 7076eabc | Alex Williamson | * full access to config space. Config space is little endian, so the data
|
1265 | 7076eabc | Alex Williamson | * register probably starts at 0x4.
|
1266 | 7076eabc | Alex Williamson | */
|
1267 | 7076eabc | Alex Williamson | static uint64_t vfio_ati_f10_quirk_read(void *opaque, |
1268 | 7076eabc | Alex Williamson | hwaddr addr, unsigned size)
|
1269 | 7076eabc | Alex Williamson | { |
1270 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1271 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1272 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1273 | 7076eabc | Alex Williamson | uint64_t data = vfio_bar_read(&vdev->bars[1], addr, size);
|
1274 | 7076eabc | Alex Williamson | |
1275 | 7076eabc | Alex Williamson | if (addr == 6 && size == 2 && quirk->data) { |
1276 | 7076eabc | Alex Williamson | data = pci_get_word(pdev->config + PCI_BASE_ADDRESS_0 + 2);
|
1277 | 7076eabc | Alex Williamson | DPRINTF("%s(BAR1+0x6) = 0x%"PRIx64"\n", __func__, data); |
1278 | 7076eabc | Alex Williamson | } |
1279 | 7076eabc | Alex Williamson | |
1280 | 7076eabc | Alex Williamson | quirk->data = 0;
|
1281 | 7076eabc | Alex Williamson | |
1282 | 7076eabc | Alex Williamson | return data;
|
1283 | 7076eabc | Alex Williamson | } |
1284 | 7076eabc | Alex Williamson | |
1285 | 7076eabc | Alex Williamson | static void vfio_ati_f10_quirk_write(void *opaque, hwaddr addr, |
1286 | 7076eabc | Alex Williamson | uint64_t data, unsigned size)
|
1287 | 7076eabc | Alex Williamson | { |
1288 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1289 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1290 | 7076eabc | Alex Williamson | |
1291 | 7076eabc | Alex Williamson | vfio_bar_write(&vdev->bars[1], addr, data, size);
|
1292 | 7076eabc | Alex Williamson | |
1293 | 7076eabc | Alex Williamson | quirk->data = (addr == 0 && size == 4 && data == 0xf10) ? 1 : 0; |
1294 | 7076eabc | Alex Williamson | } |
1295 | 7076eabc | Alex Williamson | |
1296 | 7076eabc | Alex Williamson | static const MemoryRegionOps vfio_ati_f10_quirk = { |
1297 | 7076eabc | Alex Williamson | .read = vfio_ati_f10_quirk_read, |
1298 | 7076eabc | Alex Williamson | .write = vfio_ati_f10_quirk_write, |
1299 | 7076eabc | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1300 | 7076eabc | Alex Williamson | }; |
1301 | 7076eabc | Alex Williamson | |
1302 | 7076eabc | Alex Williamson | static void vfio_probe_ati_f10_quirk(VFIODevice *vdev, int nr) |
1303 | 7076eabc | Alex Williamson | { |
1304 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1305 | 7076eabc | Alex Williamson | off_t physoffset = vdev->config_offset + PCI_BASE_ADDRESS_0; |
1306 | 7076eabc | Alex Williamson | uint32_t physbar0; |
1307 | 7076eabc | Alex Williamson | uint64_t data; |
1308 | 7076eabc | Alex Williamson | VFIOQuirk *quirk; |
1309 | 7076eabc | Alex Williamson | |
1310 | 7076eabc | Alex Williamson | if (!vdev->has_vga || nr != 1 || !vdev->bars[0].size || |
1311 | 7076eabc | Alex Williamson | pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) { |
1312 | 7076eabc | Alex Williamson | return;
|
1313 | 7076eabc | Alex Williamson | } |
1314 | 7076eabc | Alex Williamson | |
1315 | 7076eabc | Alex Williamson | /* Get I/O port BAR physical address */
|
1316 | 7076eabc | Alex Williamson | if (pread(vdev->fd, &physbar0, 4, physoffset) != 4) { |
1317 | 7076eabc | Alex Williamson | error_report("vfio: probe failed for ATI/AMD 0xf10 quirk on device "
|
1318 | 7076eabc | Alex Williamson | "%04x:%02x:%02x.%x", vdev->host.domain,
|
1319 | 7076eabc | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
1320 | 7076eabc | Alex Williamson | return;
|
1321 | 7076eabc | Alex Williamson | } |
1322 | 7076eabc | Alex Williamson | |
1323 | 7076eabc | Alex Williamson | vfio_bar_write(&vdev->bars[1], 0, 0xf10, 4); |
1324 | 7076eabc | Alex Williamson | data = vfio_bar_read(&vdev->bars[1], 0x6, 2); |
1325 | 7076eabc | Alex Williamson | |
1326 | 7076eabc | Alex Williamson | /* If the register matches the physical address of BAR0, we need a quirk */
|
1327 | 7076eabc | Alex Williamson | if (data != (le32_to_cpu(physbar0) >> 16)) { |
1328 | 7076eabc | Alex Williamson | return;
|
1329 | 7076eabc | Alex Williamson | } |
1330 | 7076eabc | Alex Williamson | |
1331 | 7076eabc | Alex Williamson | quirk = g_malloc0(sizeof(*quirk));
|
1332 | 7076eabc | Alex Williamson | quirk->vdev = vdev; |
1333 | 7076eabc | Alex Williamson | |
1334 | 7076eabc | Alex Williamson | memory_region_init_io(&quirk->mem, &vfio_ati_f10_quirk, quirk, |
1335 | 7076eabc | Alex Williamson | "vfio-ati-f10-quirk", 8); |
1336 | 7076eabc | Alex Williamson | memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1); |
1337 | 7076eabc | Alex Williamson | |
1338 | 7076eabc | Alex Williamson | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); |
1339 | 7076eabc | Alex Williamson | |
1340 | 7076eabc | Alex Williamson | DPRINTF("Enabled ATI/AMD quirk 0xf10 for device %04x:%02x:%02x.%x\n",
|
1341 | 7076eabc | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1342 | 7076eabc | Alex Williamson | vdev->host.function); |
1343 | 7076eabc | Alex Williamson | } |
1344 | 7076eabc | Alex Williamson | |
1345 | 7076eabc | Alex Williamson | #define PCI_VENDOR_ID_NVIDIA 0x10de |
1346 | 7076eabc | Alex Williamson | |
1347 | 7076eabc | Alex Williamson | /*
|
1348 | 7076eabc | Alex Williamson | * Nvidia has several different methods to get to config space, the
|
1349 | 7076eabc | Alex Williamson | * nouveu project has several of these documented here:
|
1350 | 7076eabc | Alex Williamson | * https://github.com/pathscale/envytools/tree/master/hwdocs
|
1351 | 7076eabc | Alex Williamson | *
|
1352 | 7076eabc | Alex Williamson | * The first quirk is actually not documented in envytools and is found
|
1353 | 7076eabc | Alex Williamson | * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an
|
1354 | 7076eabc | Alex Williamson | * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access
|
1355 | 7076eabc | Alex Williamson | * the mirror of PCI config space found at BAR0 offset 0x1800. The access
|
1356 | 7076eabc | Alex Williamson | * sequence first writes 0x338 to I/O port 0x3d4. The target offset is
|
1357 | 7076eabc | Alex Williamson | * then written to 0x3d0. Finally 0x538 is written for a read and 0x738
|
1358 | 7076eabc | Alex Williamson | * is written for a write to 0x3d4. The BAR0 offset is then accessible
|
1359 | 7076eabc | Alex Williamson | * through 0x3d0. This quirk doesn't seem to be necessary on newer cards
|
1360 | 7076eabc | Alex Williamson | * that use the I/O port BAR5 window but it doesn't hurt to leave it.
|
1361 | 7076eabc | Alex Williamson | */
|
1362 | 7076eabc | Alex Williamson | enum {
|
1363 | 7076eabc | Alex Williamson | NV_3D0_NONE, |
1364 | 7076eabc | Alex Williamson | NV_3D0_SELECT, |
1365 | 7076eabc | Alex Williamson | NV_3D0_WINDOW, |
1366 | 7076eabc | Alex Williamson | NV_3D0_READ, |
1367 | 7076eabc | Alex Williamson | NV_3D0_WRITE, |
1368 | 7076eabc | Alex Williamson | }; |
1369 | 7076eabc | Alex Williamson | |
1370 | 7076eabc | Alex Williamson | static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, |
1371 | 7076eabc | Alex Williamson | hwaddr addr, unsigned size)
|
1372 | 7076eabc | Alex Williamson | { |
1373 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1374 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1375 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1376 | 7076eabc | Alex Williamson | uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], |
1377 | 7076eabc | Alex Williamson | addr + 0x10, size);
|
1378 | 7076eabc | Alex Williamson | |
1379 | 7076eabc | Alex Williamson | if (quirk->data == NV_3D0_READ && addr == 0) { |
1380 | 7076eabc | Alex Williamson | data = vfio_pci_read_config(pdev, quirk->data2, size); |
1381 | 7076eabc | Alex Williamson | DPRINTF("%s(0x3d0, %d) = 0x%"PRIx64"\n", __func__, size, data); |
1382 | 7076eabc | Alex Williamson | } |
1383 | 7076eabc | Alex Williamson | |
1384 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_NONE; |
1385 | 7076eabc | Alex Williamson | |
1386 | 7076eabc | Alex Williamson | return data;
|
1387 | 7076eabc | Alex Williamson | } |
1388 | 7076eabc | Alex Williamson | |
1389 | 7076eabc | Alex Williamson | static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, |
1390 | 7076eabc | Alex Williamson | uint64_t data, unsigned size)
|
1391 | 7076eabc | Alex Williamson | { |
1392 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1393 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1394 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1395 | 7076eabc | Alex Williamson | |
1396 | 7076eabc | Alex Williamson | switch (quirk->data) {
|
1397 | 7076eabc | Alex Williamson | case NV_3D0_NONE:
|
1398 | 7076eabc | Alex Williamson | if (addr == 4 && data == 0x338) { |
1399 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_SELECT; |
1400 | 7076eabc | Alex Williamson | } |
1401 | 7076eabc | Alex Williamson | break;
|
1402 | 7076eabc | Alex Williamson | case NV_3D0_SELECT:
|
1403 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_NONE; |
1404 | 7076eabc | Alex Williamson | if (addr == 0 && (data & ~0xff) == 0x1800) { |
1405 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_WINDOW; |
1406 | 7076eabc | Alex Williamson | quirk->data2 = data & 0xff;
|
1407 | 7076eabc | Alex Williamson | } |
1408 | 7076eabc | Alex Williamson | break;
|
1409 | 7076eabc | Alex Williamson | case NV_3D0_WINDOW:
|
1410 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_NONE; |
1411 | 7076eabc | Alex Williamson | if (addr == 4) { |
1412 | 7076eabc | Alex Williamson | if (data == 0x538) { |
1413 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_READ; |
1414 | 7076eabc | Alex Williamson | } else if (data == 0x738) { |
1415 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_WRITE; |
1416 | 7076eabc | Alex Williamson | } |
1417 | 7076eabc | Alex Williamson | } |
1418 | 7076eabc | Alex Williamson | break;
|
1419 | 7076eabc | Alex Williamson | case NV_3D0_WRITE:
|
1420 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_NONE; |
1421 | 7076eabc | Alex Williamson | if (addr == 0) { |
1422 | 7076eabc | Alex Williamson | vfio_pci_write_config(pdev, quirk->data2, data, size); |
1423 | 7076eabc | Alex Williamson | DPRINTF("%s(0x3d0, 0x%"PRIx64", %d)\n", __func__, data, size); |
1424 | 7076eabc | Alex Williamson | return;
|
1425 | 7076eabc | Alex Williamson | } |
1426 | 7076eabc | Alex Williamson | break;
|
1427 | 7076eabc | Alex Williamson | default:
|
1428 | 7076eabc | Alex Williamson | quirk->data = NV_3D0_NONE; |
1429 | 7076eabc | Alex Williamson | } |
1430 | 7076eabc | Alex Williamson | |
1431 | 7076eabc | Alex Williamson | vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI], |
1432 | 7076eabc | Alex Williamson | addr + 0x10, data, size);
|
1433 | 7076eabc | Alex Williamson | } |
1434 | 7076eabc | Alex Williamson | |
1435 | 7076eabc | Alex Williamson | static const MemoryRegionOps vfio_nvidia_3d0_quirk = { |
1436 | 7076eabc | Alex Williamson | .read = vfio_nvidia_3d0_quirk_read, |
1437 | 7076eabc | Alex Williamson | .write = vfio_nvidia_3d0_quirk_write, |
1438 | 7076eabc | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1439 | 7076eabc | Alex Williamson | }; |
1440 | 7076eabc | Alex Williamson | |
1441 | 7076eabc | Alex Williamson | static void vfio_vga_probe_nvidia_3d0_quirk(VFIODevice *vdev) |
1442 | 7076eabc | Alex Williamson | { |
1443 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1444 | 7076eabc | Alex Williamson | VFIOQuirk *quirk; |
1445 | 7076eabc | Alex Williamson | |
1446 | 7076eabc | Alex Williamson | if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA ||
|
1447 | 7076eabc | Alex Williamson | !vdev->bars[1].size) {
|
1448 | 7076eabc | Alex Williamson | return;
|
1449 | 7076eabc | Alex Williamson | } |
1450 | 7076eabc | Alex Williamson | |
1451 | 7076eabc | Alex Williamson | quirk = g_malloc0(sizeof(*quirk));
|
1452 | 7076eabc | Alex Williamson | quirk->vdev = vdev; |
1453 | 7076eabc | Alex Williamson | |
1454 | 7076eabc | Alex Williamson | memory_region_init_io(&quirk->mem, &vfio_nvidia_3d0_quirk, quirk, |
1455 | 7076eabc | Alex Williamson | "vfio-nvidia-3d0-quirk", 6); |
1456 | 7076eabc | Alex Williamson | memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, |
1457 | 7076eabc | Alex Williamson | 0x10, &quirk->mem);
|
1458 | 7076eabc | Alex Williamson | |
1459 | 7076eabc | Alex Williamson | QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks, |
1460 | 7076eabc | Alex Williamson | quirk, next); |
1461 | 7076eabc | Alex Williamson | |
1462 | 7076eabc | Alex Williamson | DPRINTF("Enabled NVIDIA VGA 0x3d0 quirk for device %04x:%02x:%02x.%x\n",
|
1463 | 7076eabc | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1464 | 7076eabc | Alex Williamson | vdev->host.function); |
1465 | 7076eabc | Alex Williamson | } |
1466 | 7076eabc | Alex Williamson | |
1467 | 7076eabc | Alex Williamson | /*
|
1468 | 7076eabc | Alex Williamson | * The second quirk is documented in envytools. The I/O port BAR5 is just
|
1469 | 7076eabc | Alex Williamson | * a set of address/data ports to the MMIO BARs. The BAR we care about is
|
1470 | 7076eabc | Alex Williamson | * again BAR0. This backdoor is apparently a bit newer than the one above
|
1471 | 7076eabc | Alex Williamson | * so we need to not only trap 256 bytes @0x1800, but all of PCI config
|
1472 | 7076eabc | Alex Williamson | * space, including extended space is available at the 4k @0x88000.
|
1473 | 7076eabc | Alex Williamson | */
|
1474 | 7076eabc | Alex Williamson | enum {
|
1475 | 7076eabc | Alex Williamson | NV_BAR5_ADDRESS = 0x1,
|
1476 | 7076eabc | Alex Williamson | NV_BAR5_ENABLE = 0x2,
|
1477 | 7076eabc | Alex Williamson | NV_BAR5_MASTER = 0x4,
|
1478 | 7076eabc | Alex Williamson | NV_BAR5_VALID = 0x7,
|
1479 | 7076eabc | Alex Williamson | }; |
1480 | 7076eabc | Alex Williamson | |
1481 | 7076eabc | Alex Williamson | static uint64_t vfio_nvidia_bar5_window_quirk_read(void *opaque, |
1482 | 7076eabc | Alex Williamson | hwaddr addr, unsigned size)
|
1483 | 7076eabc | Alex Williamson | { |
1484 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1485 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1486 | 7076eabc | Alex Williamson | uint64_t data = vfio_bar_read(&vdev->bars[5], addr, size);
|
1487 | 7076eabc | Alex Williamson | |
1488 | 7076eabc | Alex Williamson | if (addr == 0xc && quirk->data == NV_BAR5_VALID) { |
1489 | 7076eabc | Alex Williamson | data = vfio_pci_read_config(&vdev->pdev, quirk->data2, size); |
1490 | 7076eabc | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x:BAR5+0x%"HWADDR_PRIx", %d) = 0x%" |
1491 | 7076eabc | Alex Williamson | PRIx64"\n", __func__, vdev->host.domain, vdev->host.bus,
|
1492 | 7076eabc | Alex Williamson | vdev->host.slot, vdev->host.function, addr, size, data); |
1493 | 7076eabc | Alex Williamson | } |
1494 | 7076eabc | Alex Williamson | |
1495 | 7076eabc | Alex Williamson | return data;
|
1496 | 7076eabc | Alex Williamson | } |
1497 | 7076eabc | Alex Williamson | |
1498 | 7076eabc | Alex Williamson | static void vfio_nvidia_bar5_window_quirk_write(void *opaque, hwaddr addr, |
1499 | 7076eabc | Alex Williamson | uint64_t data, unsigned size)
|
1500 | 7076eabc | Alex Williamson | { |
1501 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1502 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1503 | 7076eabc | Alex Williamson | |
1504 | 7076eabc | Alex Williamson | /*
|
1505 | 7076eabc | Alex Williamson | * Use quirk->data to track enables and quirk->data2 for the offset
|
1506 | 7076eabc | Alex Williamson | */
|
1507 | 7076eabc | Alex Williamson | switch (addr) {
|
1508 | 7076eabc | Alex Williamson | case 0x0: |
1509 | 7076eabc | Alex Williamson | if (data & 0x1) { |
1510 | 7076eabc | Alex Williamson | quirk->data |= NV_BAR5_MASTER; |
1511 | 7076eabc | Alex Williamson | } else {
|
1512 | 7076eabc | Alex Williamson | quirk->data &= ~NV_BAR5_MASTER; |
1513 | 7076eabc | Alex Williamson | } |
1514 | 7076eabc | Alex Williamson | break;
|
1515 | 7076eabc | Alex Williamson | case 0x4: |
1516 | 7076eabc | Alex Williamson | if (data & 0x1) { |
1517 | 7076eabc | Alex Williamson | quirk->data |= NV_BAR5_ENABLE; |
1518 | 7076eabc | Alex Williamson | } else {
|
1519 | 7076eabc | Alex Williamson | quirk->data &= ~NV_BAR5_ENABLE; |
1520 | 7076eabc | Alex Williamson | } |
1521 | 7076eabc | Alex Williamson | break;
|
1522 | 7076eabc | Alex Williamson | case 0x8: |
1523 | 7076eabc | Alex Williamson | if (quirk->data & NV_BAR5_MASTER) {
|
1524 | 7076eabc | Alex Williamson | if ((data & ~0xfff) == 0x88000) { |
1525 | 7076eabc | Alex Williamson | quirk->data |= NV_BAR5_ADDRESS; |
1526 | 7076eabc | Alex Williamson | quirk->data2 = data & 0xfff;
|
1527 | 7076eabc | Alex Williamson | } else if ((data & ~0xff) == 0x1800) { |
1528 | 7076eabc | Alex Williamson | quirk->data |= NV_BAR5_ADDRESS; |
1529 | 7076eabc | Alex Williamson | quirk->data2 = data & 0xff;
|
1530 | 7076eabc | Alex Williamson | } else {
|
1531 | 7076eabc | Alex Williamson | quirk->data &= ~NV_BAR5_ADDRESS; |
1532 | 7076eabc | Alex Williamson | } |
1533 | 7076eabc | Alex Williamson | } |
1534 | 7076eabc | Alex Williamson | break;
|
1535 | 7076eabc | Alex Williamson | case 0xc: |
1536 | 7076eabc | Alex Williamson | if (quirk->data == NV_BAR5_VALID) {
|
1537 | 7076eabc | Alex Williamson | vfio_pci_write_config(&vdev->pdev, quirk->data2, data, size); |
1538 | 7076eabc | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x:BAR5+0x%"HWADDR_PRIx", 0x%" |
1539 | 7076eabc | Alex Williamson | PRIx64", %d)\n", __func__, vdev->host.domain,
|
1540 | 7076eabc | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function, |
1541 | 7076eabc | Alex Williamson | addr, data, size); |
1542 | 7076eabc | Alex Williamson | return;
|
1543 | 7076eabc | Alex Williamson | } |
1544 | 7076eabc | Alex Williamson | } |
1545 | 7076eabc | Alex Williamson | |
1546 | 7076eabc | Alex Williamson | vfio_bar_write(&vdev->bars[5], addr, data, size);
|
1547 | 7076eabc | Alex Williamson | } |
1548 | 7076eabc | Alex Williamson | |
1549 | 7076eabc | Alex Williamson | static const MemoryRegionOps vfio_nvidia_bar5_window_quirk = { |
1550 | 7076eabc | Alex Williamson | .read = vfio_nvidia_bar5_window_quirk_read, |
1551 | 7076eabc | Alex Williamson | .write = vfio_nvidia_bar5_window_quirk_write, |
1552 | 7076eabc | Alex Williamson | .valid.min_access_size = 4,
|
1553 | 7076eabc | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1554 | 7076eabc | Alex Williamson | }; |
1555 | 7076eabc | Alex Williamson | |
1556 | 7076eabc | Alex Williamson | static void vfio_probe_nvidia_bar5_window_quirk(VFIODevice *vdev, int nr) |
1557 | 7076eabc | Alex Williamson | { |
1558 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1559 | 7076eabc | Alex Williamson | VFIOQuirk *quirk; |
1560 | 7076eabc | Alex Williamson | |
1561 | 7076eabc | Alex Williamson | if (!vdev->has_vga || nr != 5 || |
1562 | 7076eabc | Alex Williamson | pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { |
1563 | 7076eabc | Alex Williamson | return;
|
1564 | 7076eabc | Alex Williamson | } |
1565 | 7076eabc | Alex Williamson | |
1566 | 7076eabc | Alex Williamson | quirk = g_malloc0(sizeof(*quirk));
|
1567 | 7076eabc | Alex Williamson | quirk->vdev = vdev; |
1568 | 7076eabc | Alex Williamson | |
1569 | 7076eabc | Alex Williamson | memory_region_init_io(&quirk->mem, &vfio_nvidia_bar5_window_quirk, quirk, |
1570 | 7076eabc | Alex Williamson | "vfio-nvidia-bar5-window-quirk", 16); |
1571 | 7076eabc | Alex Williamson | memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1); |
1572 | 7076eabc | Alex Williamson | |
1573 | 7076eabc | Alex Williamson | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); |
1574 | 7076eabc | Alex Williamson | |
1575 | 7076eabc | Alex Williamson | DPRINTF("Enabled NVIDIA BAR5 window quirk for device %04x:%02x:%02x.%x\n",
|
1576 | 7076eabc | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1577 | 7076eabc | Alex Williamson | vdev->host.function); |
1578 | 7076eabc | Alex Williamson | } |
1579 | 7076eabc | Alex Williamson | |
1580 | 7076eabc | Alex Williamson | /*
|
1581 | 7076eabc | Alex Williamson | * Finally, BAR0 itself. We want to redirect any accesses to either
|
1582 | 7076eabc | Alex Williamson | * 0x1800 or 0x88000 through the PCI config space access functions.
|
1583 | 7076eabc | Alex Williamson | *
|
1584 | 7076eabc | Alex Williamson | * NB - quirk at a page granularity or else they don't seem to work when
|
1585 | 7076eabc | Alex Williamson | * BARs are mmap'd
|
1586 | 7076eabc | Alex Williamson | *
|
1587 | 7076eabc | Alex Williamson | * Here's offset 0x88000...
|
1588 | 7076eabc | Alex Williamson | */
|
1589 | 7076eabc | Alex Williamson | static uint64_t vfio_nvidia_bar0_88000_quirk_read(void *opaque, |
1590 | 7076eabc | Alex Williamson | hwaddr addr, unsigned size)
|
1591 | 7076eabc | Alex Williamson | { |
1592 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1593 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1594 | 7076eabc | Alex Williamson | hwaddr base = 0x88000 & TARGET_PAGE_MASK;
|
1595 | 7076eabc | Alex Williamson | hwaddr offset = 0x88000 & ~TARGET_PAGE_MASK;
|
1596 | 7076eabc | Alex Williamson | uint64_t data = vfio_bar_read(&vdev->bars[0], addr + base, size);
|
1597 | 7076eabc | Alex Williamson | |
1598 | 7076eabc | Alex Williamson | if (ranges_overlap(addr, size, offset, PCI_CONFIG_SPACE_SIZE)) {
|
1599 | 7076eabc | Alex Williamson | data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); |
1600 | 7076eabc | Alex Williamson | |
1601 | 7076eabc | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x:BAR0+0x%"HWADDR_PRIx", %d) = 0x%" |
1602 | 7076eabc | Alex Williamson | PRIx64"\n", __func__, vdev->host.domain, vdev->host.bus,
|
1603 | 7076eabc | Alex Williamson | vdev->host.slot, vdev->host.function, addr + base, size, data); |
1604 | 7076eabc | Alex Williamson | } |
1605 | 7076eabc | Alex Williamson | |
1606 | 7076eabc | Alex Williamson | return data;
|
1607 | 7076eabc | Alex Williamson | } |
1608 | 7076eabc | Alex Williamson | |
1609 | 7076eabc | Alex Williamson | static void vfio_nvidia_bar0_88000_quirk_write(void *opaque, hwaddr addr, |
1610 | 7076eabc | Alex Williamson | uint64_t data, unsigned size)
|
1611 | 7076eabc | Alex Williamson | { |
1612 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1613 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1614 | 7076eabc | Alex Williamson | hwaddr base = 0x88000 & TARGET_PAGE_MASK;
|
1615 | 7076eabc | Alex Williamson | hwaddr offset = 0x88000 & ~TARGET_PAGE_MASK;
|
1616 | 7076eabc | Alex Williamson | |
1617 | 7076eabc | Alex Williamson | if (ranges_overlap(addr, size, offset, PCI_CONFIG_SPACE_SIZE)) {
|
1618 | 7076eabc | Alex Williamson | vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); |
1619 | 7076eabc | Alex Williamson | |
1620 | 7076eabc | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x:BAR0+0x%"HWADDR_PRIx", 0x%" |
1621 | 7076eabc | Alex Williamson | PRIx64", %d)\n", __func__, vdev->host.domain, vdev->host.bus,
|
1622 | 7076eabc | Alex Williamson | vdev->host.slot, vdev->host.function, addr + base, data, size); |
1623 | 7076eabc | Alex Williamson | } else {
|
1624 | 7076eabc | Alex Williamson | vfio_bar_write(&vdev->bars[0], addr + base, data, size);
|
1625 | 7076eabc | Alex Williamson | } |
1626 | 7076eabc | Alex Williamson | } |
1627 | 7076eabc | Alex Williamson | |
1628 | 7076eabc | Alex Williamson | static const MemoryRegionOps vfio_nvidia_bar0_88000_quirk = { |
1629 | 7076eabc | Alex Williamson | .read = vfio_nvidia_bar0_88000_quirk_read, |
1630 | 7076eabc | Alex Williamson | .write = vfio_nvidia_bar0_88000_quirk_write, |
1631 | 7076eabc | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1632 | 7076eabc | Alex Williamson | }; |
1633 | 7076eabc | Alex Williamson | |
1634 | 7076eabc | Alex Williamson | static void vfio_probe_nvidia_bar0_88000_quirk(VFIODevice *vdev, int nr) |
1635 | 7076eabc | Alex Williamson | { |
1636 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1637 | 7076eabc | Alex Williamson | VFIOQuirk *quirk; |
1638 | 7076eabc | Alex Williamson | |
1639 | 7076eabc | Alex Williamson | if (!vdev->has_vga || nr != 0 || |
1640 | 7076eabc | Alex Williamson | pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { |
1641 | 7076eabc | Alex Williamson | return;
|
1642 | 7076eabc | Alex Williamson | } |
1643 | 7076eabc | Alex Williamson | |
1644 | 7076eabc | Alex Williamson | quirk = g_malloc0(sizeof(*quirk));
|
1645 | 7076eabc | Alex Williamson | quirk->vdev = vdev; |
1646 | 7076eabc | Alex Williamson | |
1647 | 7076eabc | Alex Williamson | memory_region_init_io(&quirk->mem, &vfio_nvidia_bar0_88000_quirk, quirk, |
1648 | 7076eabc | Alex Williamson | "vfio-nvidia-bar0-88000-quirk",
|
1649 | 7076eabc | Alex Williamson | TARGET_PAGE_ALIGN(PCIE_CONFIG_SPACE_SIZE)); |
1650 | 7076eabc | Alex Williamson | memory_region_add_subregion_overlap(&vdev->bars[nr].mem, |
1651 | 7076eabc | Alex Williamson | 0x88000 & TARGET_PAGE_MASK,
|
1652 | 7076eabc | Alex Williamson | &quirk->mem, 1);
|
1653 | 7076eabc | Alex Williamson | |
1654 | 7076eabc | Alex Williamson | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); |
1655 | 7076eabc | Alex Williamson | |
1656 | 7076eabc | Alex Williamson | DPRINTF("Enabled NVIDIA BAR0 0x88000 quirk for device %04x:%02x:%02x.%x\n",
|
1657 | 7076eabc | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1658 | 7076eabc | Alex Williamson | vdev->host.function); |
1659 | 7076eabc | Alex Williamson | } |
1660 | 7076eabc | Alex Williamson | |
1661 | 7076eabc | Alex Williamson | /*
|
1662 | 7076eabc | Alex Williamson | * And here's the same for BAR0 offset 0x1800...
|
1663 | 7076eabc | Alex Williamson | */
|
1664 | 7076eabc | Alex Williamson | static uint64_t vfio_nvidia_bar0_1800_quirk_read(void *opaque, |
1665 | 7076eabc | Alex Williamson | hwaddr addr, unsigned size)
|
1666 | 7076eabc | Alex Williamson | { |
1667 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1668 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1669 | 7076eabc | Alex Williamson | hwaddr base = 0x1800 & TARGET_PAGE_MASK;
|
1670 | 7076eabc | Alex Williamson | hwaddr offset = 0x1800 & ~TARGET_PAGE_MASK;
|
1671 | 7076eabc | Alex Williamson | uint64_t data = vfio_bar_read(&vdev->bars[0], addr + base, size);
|
1672 | 7076eabc | Alex Williamson | |
1673 | 7076eabc | Alex Williamson | if (ranges_overlap(addr, size, offset, PCI_CONFIG_SPACE_SIZE)) {
|
1674 | 7076eabc | Alex Williamson | data = vfio_pci_read_config(&vdev->pdev, addr - offset, size); |
1675 | 7076eabc | Alex Williamson | |
1676 | 7076eabc | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x:BAR0+0x%"HWADDR_PRIx", %d) = 0x%" |
1677 | 7076eabc | Alex Williamson | PRIx64"\n", __func__, vdev->host.domain, vdev->host.bus,
|
1678 | 7076eabc | Alex Williamson | vdev->host.slot, vdev->host.function, addr + base, size, data); |
1679 | 7076eabc | Alex Williamson | } |
1680 | 7076eabc | Alex Williamson | |
1681 | 7076eabc | Alex Williamson | return data;
|
1682 | 7076eabc | Alex Williamson | } |
1683 | 7076eabc | Alex Williamson | |
1684 | 7076eabc | Alex Williamson | static void vfio_nvidia_bar0_1800_quirk_write(void *opaque, hwaddr addr, |
1685 | 7076eabc | Alex Williamson | uint64_t data, unsigned size)
|
1686 | 7076eabc | Alex Williamson | { |
1687 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = opaque; |
1688 | 7076eabc | Alex Williamson | VFIODevice *vdev = quirk->vdev; |
1689 | 7076eabc | Alex Williamson | hwaddr base = 0x1800 & TARGET_PAGE_MASK;
|
1690 | 7076eabc | Alex Williamson | hwaddr offset = 0x1800 & ~TARGET_PAGE_MASK;
|
1691 | 7076eabc | Alex Williamson | |
1692 | 7076eabc | Alex Williamson | if (ranges_overlap(addr, size, offset, PCI_CONFIG_SPACE_SIZE)) {
|
1693 | 7076eabc | Alex Williamson | vfio_pci_write_config(&vdev->pdev, addr - offset, data, size); |
1694 | 7076eabc | Alex Williamson | |
1695 | 7076eabc | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x:BAR0+0x%"HWADDR_PRIx", 0x%" |
1696 | 7076eabc | Alex Williamson | PRIx64", %d)\n", __func__, vdev->host.domain, vdev->host.bus,
|
1697 | 7076eabc | Alex Williamson | vdev->host.slot, vdev->host.function, addr + base, data, size); |
1698 | 7076eabc | Alex Williamson | } else {
|
1699 | 7076eabc | Alex Williamson | vfio_bar_write(&vdev->bars[0], addr + base, data, size);
|
1700 | 7076eabc | Alex Williamson | } |
1701 | 7076eabc | Alex Williamson | } |
1702 | 7076eabc | Alex Williamson | |
1703 | 7076eabc | Alex Williamson | static const MemoryRegionOps vfio_nvidia_bar0_1800_quirk = { |
1704 | 7076eabc | Alex Williamson | .read = vfio_nvidia_bar0_1800_quirk_read, |
1705 | 7076eabc | Alex Williamson | .write = vfio_nvidia_bar0_1800_quirk_write, |
1706 | 7076eabc | Alex Williamson | .endianness = DEVICE_LITTLE_ENDIAN, |
1707 | 7076eabc | Alex Williamson | }; |
1708 | 7076eabc | Alex Williamson | |
1709 | 7076eabc | Alex Williamson | static void vfio_probe_nvidia_bar0_1800_quirk(VFIODevice *vdev, int nr) |
1710 | 7076eabc | Alex Williamson | { |
1711 | 7076eabc | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
1712 | 7076eabc | Alex Williamson | VFIOQuirk *quirk; |
1713 | 7076eabc | Alex Williamson | |
1714 | 7076eabc | Alex Williamson | if (!vdev->has_vga || nr != 0 || |
1715 | 7076eabc | Alex Williamson | pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) { |
1716 | 7076eabc | Alex Williamson | return;
|
1717 | 7076eabc | Alex Williamson | } |
1718 | 7076eabc | Alex Williamson | |
1719 | 7076eabc | Alex Williamson | /* Log the chipset ID */
|
1720 | 7076eabc | Alex Williamson | DPRINTF("Nvidia NV%02x\n",
|
1721 | 7076eabc | Alex Williamson | (unsigned int)(vfio_bar_read(&vdev->bars[0], 0, 4) >> 20) & 0xff); |
1722 | 7076eabc | Alex Williamson | |
1723 | 7076eabc | Alex Williamson | quirk = g_malloc0(sizeof(*quirk));
|
1724 | 7076eabc | Alex Williamson | quirk->vdev = vdev; |
1725 | 7076eabc | Alex Williamson | |
1726 | 7076eabc | Alex Williamson | memory_region_init_io(&quirk->mem, &vfio_nvidia_bar0_1800_quirk, quirk, |
1727 | 7076eabc | Alex Williamson | "vfio-nvidia-bar0-1800-quirk",
|
1728 | 7076eabc | Alex Williamson | TARGET_PAGE_ALIGN(PCI_CONFIG_SPACE_SIZE)); |
1729 | 7076eabc | Alex Williamson | memory_region_add_subregion_overlap(&vdev->bars[nr].mem, |
1730 | 7076eabc | Alex Williamson | 0x1800 & TARGET_PAGE_MASK,
|
1731 | 7076eabc | Alex Williamson | &quirk->mem, 1);
|
1732 | 7076eabc | Alex Williamson | |
1733 | 7076eabc | Alex Williamson | QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); |
1734 | 7076eabc | Alex Williamson | |
1735 | 7076eabc | Alex Williamson | DPRINTF("Enabled NVIDIA BAR0 0x1800 quirk for device %04x:%02x:%02x.%x\n",
|
1736 | 7076eabc | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1737 | 7076eabc | Alex Williamson | vdev->host.function); |
1738 | 7076eabc | Alex Williamson | } |
1739 | 7076eabc | Alex Williamson | |
1740 | 7076eabc | Alex Williamson | /*
|
1741 | 7076eabc | Alex Williamson | * TODO - Some Nvidia devices provide config access to their companion HDA
|
1742 | 7076eabc | Alex Williamson | * device and even to their parent bridge via these config space mirrors.
|
1743 | 7076eabc | Alex Williamson | * Add quirks for those regions.
|
1744 | 7076eabc | Alex Williamson | */
|
1745 | 7076eabc | Alex Williamson | |
1746 | 7076eabc | Alex Williamson | /*
|
1747 | 7076eabc | Alex Williamson | * Common quirk probe entry points.
|
1748 | 7076eabc | Alex Williamson | */
|
1749 | 7076eabc | Alex Williamson | static void vfio_vga_quirk_setup(VFIODevice *vdev) |
1750 | 7076eabc | Alex Williamson | { |
1751 | 7076eabc | Alex Williamson | vfio_vga_probe_ati_3c3_quirk(vdev); |
1752 | 7076eabc | Alex Williamson | vfio_vga_probe_nvidia_3d0_quirk(vdev); |
1753 | 7076eabc | Alex Williamson | } |
1754 | 7076eabc | Alex Williamson | |
1755 | 7076eabc | Alex Williamson | static void vfio_vga_quirk_teardown(VFIODevice *vdev) |
1756 | 7076eabc | Alex Williamson | { |
1757 | 7076eabc | Alex Williamson | int i;
|
1758 | 7076eabc | Alex Williamson | |
1759 | 7076eabc | Alex Williamson | for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) { |
1760 | 7076eabc | Alex Williamson | while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
|
1761 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks); |
1762 | 7076eabc | Alex Williamson | memory_region_del_subregion(&vdev->vga.region[i].mem, &quirk->mem); |
1763 | 7076eabc | Alex Williamson | QLIST_REMOVE(quirk, next); |
1764 | 7076eabc | Alex Williamson | g_free(quirk); |
1765 | 7076eabc | Alex Williamson | } |
1766 | 7076eabc | Alex Williamson | } |
1767 | 7076eabc | Alex Williamson | } |
1768 | 7076eabc | Alex Williamson | |
1769 | 7076eabc | Alex Williamson | static void vfio_bar_quirk_setup(VFIODevice *vdev, int nr) |
1770 | 7076eabc | Alex Williamson | { |
1771 | 7076eabc | Alex Williamson | vfio_probe_ati_4010_quirk(vdev, nr); |
1772 | 7076eabc | Alex Williamson | vfio_probe_ati_f10_quirk(vdev, nr); |
1773 | 7076eabc | Alex Williamson | vfio_probe_nvidia_bar5_window_quirk(vdev, nr); |
1774 | 7076eabc | Alex Williamson | vfio_probe_nvidia_bar0_88000_quirk(vdev, nr); |
1775 | 7076eabc | Alex Williamson | vfio_probe_nvidia_bar0_1800_quirk(vdev, nr); |
1776 | 7076eabc | Alex Williamson | } |
1777 | 7076eabc | Alex Williamson | |
1778 | 7076eabc | Alex Williamson | static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr) |
1779 | 7076eabc | Alex Williamson | { |
1780 | 7076eabc | Alex Williamson | VFIOBAR *bar = &vdev->bars[nr]; |
1781 | 7076eabc | Alex Williamson | |
1782 | 7076eabc | Alex Williamson | while (!QLIST_EMPTY(&bar->quirks)) {
|
1783 | 7076eabc | Alex Williamson | VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks); |
1784 | 7076eabc | Alex Williamson | memory_region_del_subregion(&bar->mem, &quirk->mem); |
1785 | 7076eabc | Alex Williamson | QLIST_REMOVE(quirk, next); |
1786 | 7076eabc | Alex Williamson | g_free(quirk); |
1787 | 7076eabc | Alex Williamson | } |
1788 | 7076eabc | Alex Williamson | } |
1789 | 7076eabc | Alex Williamson | |
1790 | 7076eabc | Alex Williamson | /*
|
1791 | 65501a74 | Alex Williamson | * PCI config space
|
1792 | 65501a74 | Alex Williamson | */
|
1793 | 65501a74 | Alex Williamson | static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len) |
1794 | 65501a74 | Alex Williamson | { |
1795 | 65501a74 | Alex Williamson | VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); |
1796 | 4b5d5e87 | Alex Williamson | uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; |
1797 | 65501a74 | Alex Williamson | |
1798 | 4b5d5e87 | Alex Williamson | memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); |
1799 | 4b5d5e87 | Alex Williamson | emu_bits = le32_to_cpu(emu_bits); |
1800 | 65501a74 | Alex Williamson | |
1801 | 4b5d5e87 | Alex Williamson | if (emu_bits) {
|
1802 | 4b5d5e87 | Alex Williamson | emu_val = pci_default_read_config(pdev, addr, len); |
1803 | 4b5d5e87 | Alex Williamson | } |
1804 | 4b5d5e87 | Alex Williamson | |
1805 | 4b5d5e87 | Alex Williamson | if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { |
1806 | 4b5d5e87 | Alex Williamson | ssize_t ret; |
1807 | 4b5d5e87 | Alex Williamson | |
1808 | 4b5d5e87 | Alex Williamson | ret = pread(vdev->fd, &phys_val, len, vdev->config_offset + addr); |
1809 | 4b5d5e87 | Alex Williamson | if (ret != len) {
|
1810 | 312fd5f2 | Markus Armbruster | error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m",
|
1811 | 65501a74 | Alex Williamson | __func__, vdev->host.domain, vdev->host.bus, |
1812 | 65501a74 | Alex Williamson | vdev->host.slot, vdev->host.function, addr, len); |
1813 | 65501a74 | Alex Williamson | return -errno;
|
1814 | 65501a74 | Alex Williamson | } |
1815 | 4b5d5e87 | Alex Williamson | phys_val = le32_to_cpu(phys_val); |
1816 | 65501a74 | Alex Williamson | } |
1817 | 65501a74 | Alex Williamson | |
1818 | 4b5d5e87 | Alex Williamson | val = (emu_val & emu_bits) | (phys_val & ~emu_bits); |
1819 | 65501a74 | Alex Williamson | |
1820 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
|
1821 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1822 | 65501a74 | Alex Williamson | vdev->host.function, addr, len, val); |
1823 | 65501a74 | Alex Williamson | |
1824 | 65501a74 | Alex Williamson | return val;
|
1825 | 65501a74 | Alex Williamson | } |
1826 | 65501a74 | Alex Williamson | |
1827 | 65501a74 | Alex Williamson | static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, |
1828 | 65501a74 | Alex Williamson | uint32_t val, int len)
|
1829 | 65501a74 | Alex Williamson | { |
1830 | 65501a74 | Alex Williamson | VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); |
1831 | 65501a74 | Alex Williamson | uint32_t val_le = cpu_to_le32(val); |
1832 | 65501a74 | Alex Williamson | |
1833 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, 0x%x, len=0x%x)\n", __func__,
|
1834 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
1835 | 65501a74 | Alex Williamson | vdev->host.function, addr, val, len); |
1836 | 65501a74 | Alex Williamson | |
1837 | 65501a74 | Alex Williamson | /* Write everything to VFIO, let it filter out what we can't write */
|
1838 | 65501a74 | Alex Williamson | if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != len) {
|
1839 | 312fd5f2 | Markus Armbruster | error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m",
|
1840 | 65501a74 | Alex Williamson | __func__, vdev->host.domain, vdev->host.bus, |
1841 | 65501a74 | Alex Williamson | vdev->host.slot, vdev->host.function, addr, val, len); |
1842 | 65501a74 | Alex Williamson | } |
1843 | 65501a74 | Alex Williamson | |
1844 | 65501a74 | Alex Williamson | /* MSI/MSI-X Enabling/Disabling */
|
1845 | 65501a74 | Alex Williamson | if (pdev->cap_present & QEMU_PCI_CAP_MSI &&
|
1846 | 65501a74 | Alex Williamson | ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size)) { |
1847 | 65501a74 | Alex Williamson | int is_enabled, was_enabled = msi_enabled(pdev);
|
1848 | 65501a74 | Alex Williamson | |
1849 | 65501a74 | Alex Williamson | pci_default_write_config(pdev, addr, val, len); |
1850 | 65501a74 | Alex Williamson | |
1851 | 65501a74 | Alex Williamson | is_enabled = msi_enabled(pdev); |
1852 | 65501a74 | Alex Williamson | |
1853 | 65501a74 | Alex Williamson | if (!was_enabled && is_enabled) {
|
1854 | 65501a74 | Alex Williamson | vfio_enable_msi(vdev); |
1855 | 65501a74 | Alex Williamson | } else if (was_enabled && !is_enabled) { |
1856 | fd704adc | Alex Williamson | vfio_disable_msi(vdev); |
1857 | 65501a74 | Alex Williamson | } |
1858 | 4b5d5e87 | Alex Williamson | } else if (pdev->cap_present & QEMU_PCI_CAP_MSIX && |
1859 | 65501a74 | Alex Williamson | ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) { |
1860 | 65501a74 | Alex Williamson | int is_enabled, was_enabled = msix_enabled(pdev);
|
1861 | 65501a74 | Alex Williamson | |
1862 | 65501a74 | Alex Williamson | pci_default_write_config(pdev, addr, val, len); |
1863 | 65501a74 | Alex Williamson | |
1864 | 65501a74 | Alex Williamson | is_enabled = msix_enabled(pdev); |
1865 | 65501a74 | Alex Williamson | |
1866 | 65501a74 | Alex Williamson | if (!was_enabled && is_enabled) {
|
1867 | fd704adc | Alex Williamson | vfio_enable_msix(vdev); |
1868 | 65501a74 | Alex Williamson | } else if (was_enabled && !is_enabled) { |
1869 | fd704adc | Alex Williamson | vfio_disable_msix(vdev); |
1870 | 65501a74 | Alex Williamson | } |
1871 | 4b5d5e87 | Alex Williamson | } else {
|
1872 | 4b5d5e87 | Alex Williamson | /* Write everything to QEMU to keep emulated bits correct */
|
1873 | 4b5d5e87 | Alex Williamson | pci_default_write_config(pdev, addr, val, len); |
1874 | 65501a74 | Alex Williamson | } |
1875 | 65501a74 | Alex Williamson | } |
1876 | 65501a74 | Alex Williamson | |
1877 | 65501a74 | Alex Williamson | /*
|
1878 | 65501a74 | Alex Williamson | * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
|
1879 | 65501a74 | Alex Williamson | */
|
1880 | af6bc27e | Alex Williamson | static int vfio_dma_unmap(VFIOContainer *container, |
1881 | a8170e5e | Avi Kivity | hwaddr iova, ram_addr_t size) |
1882 | af6bc27e | Alex Williamson | { |
1883 | af6bc27e | Alex Williamson | struct vfio_iommu_type1_dma_unmap unmap = {
|
1884 | af6bc27e | Alex Williamson | .argsz = sizeof(unmap),
|
1885 | af6bc27e | Alex Williamson | .flags = 0,
|
1886 | af6bc27e | Alex Williamson | .iova = iova, |
1887 | af6bc27e | Alex Williamson | .size = size, |
1888 | af6bc27e | Alex Williamson | }; |
1889 | af6bc27e | Alex Williamson | |
1890 | af6bc27e | Alex Williamson | if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
|
1891 | af6bc27e | Alex Williamson | DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
|
1892 | af6bc27e | Alex Williamson | return -errno;
|
1893 | af6bc27e | Alex Williamson | } |
1894 | af6bc27e | Alex Williamson | |
1895 | af6bc27e | Alex Williamson | return 0; |
1896 | af6bc27e | Alex Williamson | } |
1897 | af6bc27e | Alex Williamson | |
1898 | a8170e5e | Avi Kivity | static int vfio_dma_map(VFIOContainer *container, hwaddr iova, |
1899 | 65501a74 | Alex Williamson | ram_addr_t size, void *vaddr, bool readonly) |
1900 | 65501a74 | Alex Williamson | { |
1901 | 65501a74 | Alex Williamson | struct vfio_iommu_type1_dma_map map = {
|
1902 | 65501a74 | Alex Williamson | .argsz = sizeof(map),
|
1903 | 65501a74 | Alex Williamson | .flags = VFIO_DMA_MAP_FLAG_READ, |
1904 | 5976cdd5 | Alex Williamson | .vaddr = (__u64)(uintptr_t)vaddr, |
1905 | 65501a74 | Alex Williamson | .iova = iova, |
1906 | 65501a74 | Alex Williamson | .size = size, |
1907 | 65501a74 | Alex Williamson | }; |
1908 | 65501a74 | Alex Williamson | |
1909 | 65501a74 | Alex Williamson | if (!readonly) {
|
1910 | 65501a74 | Alex Williamson | map.flags |= VFIO_DMA_MAP_FLAG_WRITE; |
1911 | 65501a74 | Alex Williamson | } |
1912 | 65501a74 | Alex Williamson | |
1913 | 12af1344 | Alex Williamson | /*
|
1914 | 12af1344 | Alex Williamson | * Try the mapping, if it fails with EBUSY, unmap the region and try
|
1915 | 12af1344 | Alex Williamson | * again. This shouldn't be necessary, but we sometimes see it in
|
1916 | 12af1344 | Alex Williamson | * the the VGA ROM space.
|
1917 | 12af1344 | Alex Williamson | */
|
1918 | 12af1344 | Alex Williamson | if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || |
1919 | 12af1344 | Alex Williamson | (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
|
1920 | 12af1344 | Alex Williamson | ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
|
1921 | 12af1344 | Alex Williamson | return 0; |
1922 | 65501a74 | Alex Williamson | } |
1923 | 65501a74 | Alex Williamson | |
1924 | 12af1344 | Alex Williamson | DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
|
1925 | 12af1344 | Alex Williamson | return -errno;
|
1926 | 65501a74 | Alex Williamson | } |
1927 | 65501a74 | Alex Williamson | |
1928 | 65501a74 | Alex Williamson | static bool vfio_listener_skipped_section(MemoryRegionSection *section) |
1929 | 65501a74 | Alex Williamson | { |
1930 | 65501a74 | Alex Williamson | return !memory_region_is_ram(section->mr);
|
1931 | 65501a74 | Alex Williamson | } |
1932 | 65501a74 | Alex Williamson | |
1933 | 65501a74 | Alex Williamson | static void vfio_listener_region_add(MemoryListener *listener, |
1934 | 65501a74 | Alex Williamson | MemoryRegionSection *section) |
1935 | 65501a74 | Alex Williamson | { |
1936 | 65501a74 | Alex Williamson | VFIOContainer *container = container_of(listener, VFIOContainer, |
1937 | 65501a74 | Alex Williamson | iommu_data.listener); |
1938 | a8170e5e | Avi Kivity | hwaddr iova, end; |
1939 | 65501a74 | Alex Williamson | void *vaddr;
|
1940 | 65501a74 | Alex Williamson | int ret;
|
1941 | 65501a74 | Alex Williamson | |
1942 | 06d985f5 | Avi Kivity | assert(!memory_region_is_iommu(section->mr)); |
1943 | 06d985f5 | Avi Kivity | |
1944 | 65501a74 | Alex Williamson | if (vfio_listener_skipped_section(section)) {
|
1945 | 82ca8912 | Alex Williamson | DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n", |
1946 | 65501a74 | Alex Williamson | section->offset_within_address_space, |
1947 | 65501a74 | Alex Williamson | section->offset_within_address_space + section->size - 1);
|
1948 | 65501a74 | Alex Williamson | return;
|
1949 | 65501a74 | Alex Williamson | } |
1950 | 65501a74 | Alex Williamson | |
1951 | 65501a74 | Alex Williamson | if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
|
1952 | 65501a74 | Alex Williamson | (section->offset_within_region & ~TARGET_PAGE_MASK))) { |
1953 | 312fd5f2 | Markus Armbruster | error_report("%s received unaligned region", __func__);
|
1954 | 65501a74 | Alex Williamson | return;
|
1955 | 65501a74 | Alex Williamson | } |
1956 | 65501a74 | Alex Williamson | |
1957 | 65501a74 | Alex Williamson | iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); |
1958 | 052e87b0 | Paolo Bonzini | end = (section->offset_within_address_space + int128_get64(section->size)) & |
1959 | 65501a74 | Alex Williamson | TARGET_PAGE_MASK; |
1960 | 65501a74 | Alex Williamson | |
1961 | 65501a74 | Alex Williamson | if (iova >= end) {
|
1962 | 65501a74 | Alex Williamson | return;
|
1963 | 65501a74 | Alex Williamson | } |
1964 | 65501a74 | Alex Williamson | |
1965 | 65501a74 | Alex Williamson | vaddr = memory_region_get_ram_ptr(section->mr) + |
1966 | 65501a74 | Alex Williamson | section->offset_within_region + |
1967 | 65501a74 | Alex Williamson | (iova - section->offset_within_address_space); |
1968 | 65501a74 | Alex Williamson | |
1969 | 82ca8912 | Alex Williamson | DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n", |
1970 | 65501a74 | Alex Williamson | iova, end - 1, vaddr);
|
1971 | 65501a74 | Alex Williamson | |
1972 | 65501a74 | Alex Williamson | ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly); |
1973 | 65501a74 | Alex Williamson | if (ret) {
|
1974 | a8170e5e | Avi Kivity | error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " |
1975 | 312fd5f2 | Markus Armbruster | "0x%"HWADDR_PRIx", %p) = %d (%m)", |
1976 | 65501a74 | Alex Williamson | container, iova, end - iova, vaddr, ret); |
1977 | 65501a74 | Alex Williamson | } |
1978 | 65501a74 | Alex Williamson | } |
1979 | 65501a74 | Alex Williamson | |
1980 | 65501a74 | Alex Williamson | static void vfio_listener_region_del(MemoryListener *listener, |
1981 | 65501a74 | Alex Williamson | MemoryRegionSection *section) |
1982 | 65501a74 | Alex Williamson | { |
1983 | 65501a74 | Alex Williamson | VFIOContainer *container = container_of(listener, VFIOContainer, |
1984 | 65501a74 | Alex Williamson | iommu_data.listener); |
1985 | a8170e5e | Avi Kivity | hwaddr iova, end; |
1986 | 65501a74 | Alex Williamson | int ret;
|
1987 | 65501a74 | Alex Williamson | |
1988 | 65501a74 | Alex Williamson | if (vfio_listener_skipped_section(section)) {
|
1989 | 82ca8912 | Alex Williamson | DPRINTF("SKIPPING region_del %"HWADDR_PRIx" - %"PRIx64"\n", |
1990 | 65501a74 | Alex Williamson | section->offset_within_address_space, |
1991 | 65501a74 | Alex Williamson | section->offset_within_address_space + section->size - 1);
|
1992 | 65501a74 | Alex Williamson | return;
|
1993 | 65501a74 | Alex Williamson | } |
1994 | 65501a74 | Alex Williamson | |
1995 | 65501a74 | Alex Williamson | if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
|
1996 | 65501a74 | Alex Williamson | (section->offset_within_region & ~TARGET_PAGE_MASK))) { |
1997 | 312fd5f2 | Markus Armbruster | error_report("%s received unaligned region", __func__);
|
1998 | 65501a74 | Alex Williamson | return;
|
1999 | 65501a74 | Alex Williamson | } |
2000 | 65501a74 | Alex Williamson | |
2001 | 65501a74 | Alex Williamson | iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); |
2002 | 052e87b0 | Paolo Bonzini | end = (section->offset_within_address_space + int128_get64(section->size)) & |
2003 | 65501a74 | Alex Williamson | TARGET_PAGE_MASK; |
2004 | 65501a74 | Alex Williamson | |
2005 | 65501a74 | Alex Williamson | if (iova >= end) {
|
2006 | 65501a74 | Alex Williamson | return;
|
2007 | 65501a74 | Alex Williamson | } |
2008 | 65501a74 | Alex Williamson | |
2009 | 82ca8912 | Alex Williamson | DPRINTF("region_del %"HWADDR_PRIx" - %"HWADDR_PRIx"\n", |
2010 | 65501a74 | Alex Williamson | iova, end - 1);
|
2011 | 65501a74 | Alex Williamson | |
2012 | 65501a74 | Alex Williamson | ret = vfio_dma_unmap(container, iova, end - iova); |
2013 | 65501a74 | Alex Williamson | if (ret) {
|
2014 | a8170e5e | Avi Kivity | error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " |
2015 | 312fd5f2 | Markus Armbruster | "0x%"HWADDR_PRIx") = %d (%m)", |
2016 | 65501a74 | Alex Williamson | container, iova, end - iova, ret); |
2017 | 65501a74 | Alex Williamson | } |
2018 | 65501a74 | Alex Williamson | } |
2019 | 65501a74 | Alex Williamson | |
2020 | 65501a74 | Alex Williamson | static MemoryListener vfio_memory_listener = {
|
2021 | 65501a74 | Alex Williamson | .region_add = vfio_listener_region_add, |
2022 | 65501a74 | Alex Williamson | .region_del = vfio_listener_region_del, |
2023 | 65501a74 | Alex Williamson | }; |
2024 | 65501a74 | Alex Williamson | |
2025 | 65501a74 | Alex Williamson | static void vfio_listener_release(VFIOContainer *container) |
2026 | 65501a74 | Alex Williamson | { |
2027 | 65501a74 | Alex Williamson | memory_listener_unregister(&container->iommu_data.listener); |
2028 | 65501a74 | Alex Williamson | } |
2029 | 65501a74 | Alex Williamson | |
2030 | 65501a74 | Alex Williamson | /*
|
2031 | 65501a74 | Alex Williamson | * Interrupt setup
|
2032 | 65501a74 | Alex Williamson | */
|
2033 | 65501a74 | Alex Williamson | static void vfio_disable_interrupts(VFIODevice *vdev) |
2034 | 65501a74 | Alex Williamson | { |
2035 | 65501a74 | Alex Williamson | switch (vdev->interrupt) {
|
2036 | 65501a74 | Alex Williamson | case VFIO_INT_INTx:
|
2037 | 65501a74 | Alex Williamson | vfio_disable_intx(vdev); |
2038 | 65501a74 | Alex Williamson | break;
|
2039 | 65501a74 | Alex Williamson | case VFIO_INT_MSI:
|
2040 | fd704adc | Alex Williamson | vfio_disable_msi(vdev); |
2041 | 65501a74 | Alex Williamson | break;
|
2042 | 65501a74 | Alex Williamson | case VFIO_INT_MSIX:
|
2043 | fd704adc | Alex Williamson | vfio_disable_msix(vdev); |
2044 | 65501a74 | Alex Williamson | break;
|
2045 | 65501a74 | Alex Williamson | } |
2046 | 65501a74 | Alex Williamson | } |
2047 | 65501a74 | Alex Williamson | |
2048 | 65501a74 | Alex Williamson | static int vfio_setup_msi(VFIODevice *vdev, int pos) |
2049 | 65501a74 | Alex Williamson | { |
2050 | 65501a74 | Alex Williamson | uint16_t ctrl; |
2051 | 65501a74 | Alex Williamson | bool msi_64bit, msi_maskbit;
|
2052 | 65501a74 | Alex Williamson | int ret, entries;
|
2053 | 65501a74 | Alex Williamson | |
2054 | 65501a74 | Alex Williamson | if (pread(vdev->fd, &ctrl, sizeof(ctrl), |
2055 | 65501a74 | Alex Williamson | vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
|
2056 | 65501a74 | Alex Williamson | return -errno;
|
2057 | 65501a74 | Alex Williamson | } |
2058 | 65501a74 | Alex Williamson | ctrl = le16_to_cpu(ctrl); |
2059 | 65501a74 | Alex Williamson | |
2060 | 65501a74 | Alex Williamson | msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); |
2061 | 65501a74 | Alex Williamson | msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); |
2062 | 65501a74 | Alex Williamson | entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); |
2063 | 65501a74 | Alex Williamson | |
2064 | 65501a74 | Alex Williamson | DPRINTF("%04x:%02x:%02x.%x PCI MSI CAP @0x%x\n", vdev->host.domain,
|
2065 | 65501a74 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function, pos); |
2066 | 65501a74 | Alex Williamson | |
2067 | 65501a74 | Alex Williamson | ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit); |
2068 | 65501a74 | Alex Williamson | if (ret < 0) { |
2069 | e43b9a5a | Alex Williamson | if (ret == -ENOTSUP) {
|
2070 | e43b9a5a | Alex Williamson | return 0; |
2071 | e43b9a5a | Alex Williamson | } |
2072 | 312fd5f2 | Markus Armbruster | error_report("vfio: msi_init failed");
|
2073 | 65501a74 | Alex Williamson | return ret;
|
2074 | 65501a74 | Alex Williamson | } |
2075 | 65501a74 | Alex Williamson | vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); |
2076 | 65501a74 | Alex Williamson | |
2077 | 65501a74 | Alex Williamson | return 0; |
2078 | 65501a74 | Alex Williamson | } |
2079 | 65501a74 | Alex Williamson | |
2080 | 65501a74 | Alex Williamson | /*
|
2081 | 65501a74 | Alex Williamson | * We don't have any control over how pci_add_capability() inserts
|
2082 | 65501a74 | Alex Williamson | * capabilities into the chain. In order to setup MSI-X we need a
|
2083 | 65501a74 | Alex Williamson | * MemoryRegion for the BAR. In order to setup the BAR and not
|
2084 | 65501a74 | Alex Williamson | * attempt to mmap the MSI-X table area, which VFIO won't allow, we
|
2085 | 65501a74 | Alex Williamson | * need to first look for where the MSI-X table lives. So we
|
2086 | 65501a74 | Alex Williamson | * unfortunately split MSI-X setup across two functions.
|
2087 | 65501a74 | Alex Williamson | */
|
2088 | 65501a74 | Alex Williamson | static int vfio_early_setup_msix(VFIODevice *vdev) |
2089 | 65501a74 | Alex Williamson | { |
2090 | 65501a74 | Alex Williamson | uint8_t pos; |
2091 | 65501a74 | Alex Williamson | uint16_t ctrl; |
2092 | 65501a74 | Alex Williamson | uint32_t table, pba; |
2093 | 65501a74 | Alex Williamson | |
2094 | 65501a74 | Alex Williamson | pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); |
2095 | 65501a74 | Alex Williamson | if (!pos) {
|
2096 | 65501a74 | Alex Williamson | return 0; |
2097 | 65501a74 | Alex Williamson | } |
2098 | 65501a74 | Alex Williamson | |
2099 | 65501a74 | Alex Williamson | if (pread(vdev->fd, &ctrl, sizeof(ctrl), |
2100 | 65501a74 | Alex Williamson | vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
|
2101 | 65501a74 | Alex Williamson | return -errno;
|
2102 | 65501a74 | Alex Williamson | } |
2103 | 65501a74 | Alex Williamson | |
2104 | 65501a74 | Alex Williamson | if (pread(vdev->fd, &table, sizeof(table), |
2105 | 65501a74 | Alex Williamson | vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) {
|
2106 | 65501a74 | Alex Williamson | return -errno;
|
2107 | 65501a74 | Alex Williamson | } |
2108 | 65501a74 | Alex Williamson | |
2109 | 65501a74 | Alex Williamson | if (pread(vdev->fd, &pba, sizeof(pba), |
2110 | 65501a74 | Alex Williamson | vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
|
2111 | 65501a74 | Alex Williamson | return -errno;
|
2112 | 65501a74 | Alex Williamson | } |
2113 | 65501a74 | Alex Williamson | |
2114 | 65501a74 | Alex Williamson | ctrl = le16_to_cpu(ctrl); |
2115 | 65501a74 | Alex Williamson | table = le32_to_cpu(table); |
2116 | 65501a74 | Alex Williamson | pba = le32_to_cpu(pba); |
2117 | 65501a74 | Alex Williamson | |
2118 | 65501a74 | Alex Williamson | vdev->msix = g_malloc0(sizeof(*(vdev->msix)));
|
2119 | 65501a74 | Alex Williamson | vdev->msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; |
2120 | 65501a74 | Alex Williamson | vdev->msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; |
2121 | 65501a74 | Alex Williamson | vdev->msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; |
2122 | 65501a74 | Alex Williamson | vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK; |
2123 | 65501a74 | Alex Williamson | vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
|
2124 | 65501a74 | Alex Williamson | |
2125 | 65501a74 | Alex Williamson | DPRINTF("%04x:%02x:%02x.%x "
|
2126 | 65501a74 | Alex Williamson | "PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d\n",
|
2127 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
2128 | 65501a74 | Alex Williamson | vdev->host.function, pos, vdev->msix->table_bar, |
2129 | 65501a74 | Alex Williamson | vdev->msix->table_offset, vdev->msix->entries); |
2130 | 65501a74 | Alex Williamson | |
2131 | 65501a74 | Alex Williamson | return 0; |
2132 | 65501a74 | Alex Williamson | } |
2133 | 65501a74 | Alex Williamson | |
2134 | 65501a74 | Alex Williamson | static int vfio_setup_msix(VFIODevice *vdev, int pos) |
2135 | 65501a74 | Alex Williamson | { |
2136 | 65501a74 | Alex Williamson | int ret;
|
2137 | 65501a74 | Alex Williamson | |
2138 | 65501a74 | Alex Williamson | ret = msix_init(&vdev->pdev, vdev->msix->entries, |
2139 | 65501a74 | Alex Williamson | &vdev->bars[vdev->msix->table_bar].mem, |
2140 | 65501a74 | Alex Williamson | vdev->msix->table_bar, vdev->msix->table_offset, |
2141 | 65501a74 | Alex Williamson | &vdev->bars[vdev->msix->pba_bar].mem, |
2142 | 65501a74 | Alex Williamson | vdev->msix->pba_bar, vdev->msix->pba_offset, pos); |
2143 | 65501a74 | Alex Williamson | if (ret < 0) { |
2144 | e43b9a5a | Alex Williamson | if (ret == -ENOTSUP) {
|
2145 | e43b9a5a | Alex Williamson | return 0; |
2146 | e43b9a5a | Alex Williamson | } |
2147 | 312fd5f2 | Markus Armbruster | error_report("vfio: msix_init failed");
|
2148 | 65501a74 | Alex Williamson | return ret;
|
2149 | 65501a74 | Alex Williamson | } |
2150 | 65501a74 | Alex Williamson | |
2151 | 65501a74 | Alex Williamson | return 0; |
2152 | 65501a74 | Alex Williamson | } |
2153 | 65501a74 | Alex Williamson | |
2154 | 65501a74 | Alex Williamson | static void vfio_teardown_msi(VFIODevice *vdev) |
2155 | 65501a74 | Alex Williamson | { |
2156 | 65501a74 | Alex Williamson | msi_uninit(&vdev->pdev); |
2157 | 65501a74 | Alex Williamson | |
2158 | 65501a74 | Alex Williamson | if (vdev->msix) {
|
2159 | 65501a74 | Alex Williamson | msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem, |
2160 | 65501a74 | Alex Williamson | &vdev->bars[vdev->msix->pba_bar].mem); |
2161 | 65501a74 | Alex Williamson | } |
2162 | 65501a74 | Alex Williamson | } |
2163 | 65501a74 | Alex Williamson | |
2164 | 65501a74 | Alex Williamson | /*
|
2165 | 65501a74 | Alex Williamson | * Resource setup
|
2166 | 65501a74 | Alex Williamson | */
|
2167 | 65501a74 | Alex Williamson | static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled) |
2168 | 65501a74 | Alex Williamson | { |
2169 | 65501a74 | Alex Williamson | int i;
|
2170 | 65501a74 | Alex Williamson | |
2171 | 65501a74 | Alex Williamson | for (i = 0; i < PCI_ROM_SLOT; i++) { |
2172 | 65501a74 | Alex Williamson | VFIOBAR *bar = &vdev->bars[i]; |
2173 | 65501a74 | Alex Williamson | |
2174 | 65501a74 | Alex Williamson | if (!bar->size) {
|
2175 | 65501a74 | Alex Williamson | continue;
|
2176 | 65501a74 | Alex Williamson | } |
2177 | 65501a74 | Alex Williamson | |
2178 | 65501a74 | Alex Williamson | memory_region_set_enabled(&bar->mmap_mem, enabled); |
2179 | 65501a74 | Alex Williamson | if (vdev->msix && vdev->msix->table_bar == i) {
|
2180 | 65501a74 | Alex Williamson | memory_region_set_enabled(&vdev->msix->mmap_mem, enabled); |
2181 | 65501a74 | Alex Williamson | } |
2182 | 65501a74 | Alex Williamson | } |
2183 | 65501a74 | Alex Williamson | } |
2184 | 65501a74 | Alex Williamson | |
2185 | 65501a74 | Alex Williamson | static void vfio_unmap_bar(VFIODevice *vdev, int nr) |
2186 | 65501a74 | Alex Williamson | { |
2187 | 65501a74 | Alex Williamson | VFIOBAR *bar = &vdev->bars[nr]; |
2188 | 65501a74 | Alex Williamson | |
2189 | 65501a74 | Alex Williamson | if (!bar->size) {
|
2190 | 65501a74 | Alex Williamson | return;
|
2191 | 65501a74 | Alex Williamson | } |
2192 | 65501a74 | Alex Williamson | |
2193 | 7076eabc | Alex Williamson | vfio_bar_quirk_teardown(vdev, nr); |
2194 | 7076eabc | Alex Williamson | |
2195 | 65501a74 | Alex Williamson | memory_region_del_subregion(&bar->mem, &bar->mmap_mem); |
2196 | 65501a74 | Alex Williamson | munmap(bar->mmap, memory_region_size(&bar->mmap_mem)); |
2197 | 65501a74 | Alex Williamson | |
2198 | 65501a74 | Alex Williamson | if (vdev->msix && vdev->msix->table_bar == nr) {
|
2199 | 65501a74 | Alex Williamson | memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem); |
2200 | 65501a74 | Alex Williamson | munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem)); |
2201 | 65501a74 | Alex Williamson | } |
2202 | 65501a74 | Alex Williamson | |
2203 | 65501a74 | Alex Williamson | memory_region_destroy(&bar->mem); |
2204 | 65501a74 | Alex Williamson | } |
2205 | 65501a74 | Alex Williamson | |
2206 | 65501a74 | Alex Williamson | static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion *submem, |
2207 | 65501a74 | Alex Williamson | void **map, size_t size, off_t offset,
|
2208 | 65501a74 | Alex Williamson | const char *name) |
2209 | 65501a74 | Alex Williamson | { |
2210 | 65501a74 | Alex Williamson | int ret = 0; |
2211 | 65501a74 | Alex Williamson | |
2212 | 82ca8912 | Alex Williamson | if (VFIO_ALLOW_MMAP && size && bar->flags & VFIO_REGION_INFO_FLAG_MMAP) {
|
2213 | 65501a74 | Alex Williamson | int prot = 0; |
2214 | 65501a74 | Alex Williamson | |
2215 | 65501a74 | Alex Williamson | if (bar->flags & VFIO_REGION_INFO_FLAG_READ) {
|
2216 | 65501a74 | Alex Williamson | prot |= PROT_READ; |
2217 | 65501a74 | Alex Williamson | } |
2218 | 65501a74 | Alex Williamson | |
2219 | 65501a74 | Alex Williamson | if (bar->flags & VFIO_REGION_INFO_FLAG_WRITE) {
|
2220 | 65501a74 | Alex Williamson | prot |= PROT_WRITE; |
2221 | 65501a74 | Alex Williamson | } |
2222 | 65501a74 | Alex Williamson | |
2223 | 65501a74 | Alex Williamson | *map = mmap(NULL, size, prot, MAP_SHARED,
|
2224 | 65501a74 | Alex Williamson | bar->fd, bar->fd_offset + offset); |
2225 | 65501a74 | Alex Williamson | if (*map == MAP_FAILED) {
|
2226 | 65501a74 | Alex Williamson | *map = NULL;
|
2227 | 65501a74 | Alex Williamson | ret = -errno; |
2228 | 65501a74 | Alex Williamson | goto empty_region;
|
2229 | 65501a74 | Alex Williamson | } |
2230 | 65501a74 | Alex Williamson | |
2231 | 65501a74 | Alex Williamson | memory_region_init_ram_ptr(submem, name, size, *map); |
2232 | 65501a74 | Alex Williamson | } else {
|
2233 | 65501a74 | Alex Williamson | empty_region:
|
2234 | 65501a74 | Alex Williamson | /* Create a zero sized sub-region to make cleanup easy. */
|
2235 | 65501a74 | Alex Williamson | memory_region_init(submem, name, 0);
|
2236 | 65501a74 | Alex Williamson | } |
2237 | 65501a74 | Alex Williamson | |
2238 | 65501a74 | Alex Williamson | memory_region_add_subregion(mem, offset, submem); |
2239 | 65501a74 | Alex Williamson | |
2240 | 65501a74 | Alex Williamson | return ret;
|
2241 | 65501a74 | Alex Williamson | } |
2242 | 65501a74 | Alex Williamson | |
2243 | 65501a74 | Alex Williamson | static void vfio_map_bar(VFIODevice *vdev, int nr) |
2244 | 65501a74 | Alex Williamson | { |
2245 | 65501a74 | Alex Williamson | VFIOBAR *bar = &vdev->bars[nr]; |
2246 | 65501a74 | Alex Williamson | unsigned size = bar->size;
|
2247 | 65501a74 | Alex Williamson | char name[64]; |
2248 | 65501a74 | Alex Williamson | uint32_t pci_bar; |
2249 | 65501a74 | Alex Williamson | uint8_t type; |
2250 | 65501a74 | Alex Williamson | int ret;
|
2251 | 65501a74 | Alex Williamson | |
2252 | 65501a74 | Alex Williamson | /* Skip both unimplemented BARs and the upper half of 64bit BARS. */
|
2253 | 65501a74 | Alex Williamson | if (!size) {
|
2254 | 65501a74 | Alex Williamson | return;
|
2255 | 65501a74 | Alex Williamson | } |
2256 | 65501a74 | Alex Williamson | |
2257 | 65501a74 | Alex Williamson | snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d", |
2258 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
2259 | 65501a74 | Alex Williamson | vdev->host.function, nr); |
2260 | 65501a74 | Alex Williamson | |
2261 | 65501a74 | Alex Williamson | /* Determine what type of BAR this is for registration */
|
2262 | 65501a74 | Alex Williamson | ret = pread(vdev->fd, &pci_bar, sizeof(pci_bar),
|
2263 | 65501a74 | Alex Williamson | vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
|
2264 | 65501a74 | Alex Williamson | if (ret != sizeof(pci_bar)) { |
2265 | 312fd5f2 | Markus Armbruster | error_report("vfio: Failed to read BAR %d (%m)", nr);
|
2266 | 65501a74 | Alex Williamson | return;
|
2267 | 65501a74 | Alex Williamson | } |
2268 | 65501a74 | Alex Williamson | |
2269 | 65501a74 | Alex Williamson | pci_bar = le32_to_cpu(pci_bar); |
2270 | 65501a74 | Alex Williamson | type = pci_bar & (pci_bar & PCI_BASE_ADDRESS_SPACE_IO ? |
2271 | 65501a74 | Alex Williamson | ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); |
2272 | 65501a74 | Alex Williamson | |
2273 | 65501a74 | Alex Williamson | /* A "slow" read/write mapping underlies all BARs */
|
2274 | 65501a74 | Alex Williamson | memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size); |
2275 | 65501a74 | Alex Williamson | pci_register_bar(&vdev->pdev, nr, type, &bar->mem); |
2276 | 65501a74 | Alex Williamson | |
2277 | 65501a74 | Alex Williamson | /*
|
2278 | 65501a74 | Alex Williamson | * We can't mmap areas overlapping the MSIX vector table, so we
|
2279 | 65501a74 | Alex Williamson | * potentially insert a direct-mapped subregion before and after it.
|
2280 | 65501a74 | Alex Williamson | */
|
2281 | 65501a74 | Alex Williamson | if (vdev->msix && vdev->msix->table_bar == nr) {
|
2282 | 65501a74 | Alex Williamson | size = vdev->msix->table_offset & TARGET_PAGE_MASK; |
2283 | 65501a74 | Alex Williamson | } |
2284 | 65501a74 | Alex Williamson | |
2285 | 65501a74 | Alex Williamson | strncat(name, " mmap", sizeof(name) - strlen(name) - 1); |
2286 | 65501a74 | Alex Williamson | if (vfio_mmap_bar(bar, &bar->mem,
|
2287 | 65501a74 | Alex Williamson | &bar->mmap_mem, &bar->mmap, size, 0, name)) {
|
2288 | 312fd5f2 | Markus Armbruster | error_report("%s unsupported. Performance may be slow", name);
|
2289 | 65501a74 | Alex Williamson | } |
2290 | 65501a74 | Alex Williamson | |
2291 | 65501a74 | Alex Williamson | if (vdev->msix && vdev->msix->table_bar == nr) {
|
2292 | 65501a74 | Alex Williamson | unsigned start;
|
2293 | 65501a74 | Alex Williamson | |
2294 | 65501a74 | Alex Williamson | start = TARGET_PAGE_ALIGN(vdev->msix->table_offset + |
2295 | 65501a74 | Alex Williamson | (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); |
2296 | 65501a74 | Alex Williamson | |
2297 | 65501a74 | Alex Williamson | size = start < bar->size ? bar->size - start : 0;
|
2298 | 65501a74 | Alex Williamson | strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1); |
2299 | 65501a74 | Alex Williamson | /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
|
2300 | 65501a74 | Alex Williamson | if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem,
|
2301 | 65501a74 | Alex Williamson | &vdev->msix->mmap, size, start, name)) { |
2302 | 312fd5f2 | Markus Armbruster | error_report("%s unsupported. Performance may be slow", name);
|
2303 | 65501a74 | Alex Williamson | } |
2304 | 65501a74 | Alex Williamson | } |
2305 | 7076eabc | Alex Williamson | |
2306 | 7076eabc | Alex Williamson | vfio_bar_quirk_setup(vdev, nr); |
2307 | 65501a74 | Alex Williamson | } |
2308 | 65501a74 | Alex Williamson | |
2309 | 65501a74 | Alex Williamson | static void vfio_map_bars(VFIODevice *vdev) |
2310 | 65501a74 | Alex Williamson | { |
2311 | 65501a74 | Alex Williamson | int i;
|
2312 | 65501a74 | Alex Williamson | |
2313 | 65501a74 | Alex Williamson | for (i = 0; i < PCI_ROM_SLOT; i++) { |
2314 | 65501a74 | Alex Williamson | vfio_map_bar(vdev, i); |
2315 | 65501a74 | Alex Williamson | } |
2316 | f15689c7 | Alex Williamson | |
2317 | f15689c7 | Alex Williamson | if (vdev->has_vga) {
|
2318 | f15689c7 | Alex Williamson | memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem, |
2319 | f15689c7 | Alex Williamson | &vfio_vga_ops, |
2320 | f15689c7 | Alex Williamson | &vdev->vga.region[QEMU_PCI_VGA_MEM], |
2321 | f15689c7 | Alex Williamson | "vfio-vga-mmio@0xa0000",
|
2322 | f15689c7 | Alex Williamson | QEMU_PCI_VGA_MEM_SIZE); |
2323 | f15689c7 | Alex Williamson | memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, |
2324 | f15689c7 | Alex Williamson | &vfio_vga_ops, |
2325 | f15689c7 | Alex Williamson | &vdev->vga.region[QEMU_PCI_VGA_IO_LO], |
2326 | f15689c7 | Alex Williamson | "vfio-vga-io@0x3b0",
|
2327 | f15689c7 | Alex Williamson | QEMU_PCI_VGA_IO_LO_SIZE); |
2328 | f15689c7 | Alex Williamson | memory_region_init_io(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, |
2329 | f15689c7 | Alex Williamson | &vfio_vga_ops, |
2330 | f15689c7 | Alex Williamson | &vdev->vga.region[QEMU_PCI_VGA_IO_HI], |
2331 | f15689c7 | Alex Williamson | "vfio-vga-io@0x3c0",
|
2332 | f15689c7 | Alex Williamson | QEMU_PCI_VGA_IO_HI_SIZE); |
2333 | f15689c7 | Alex Williamson | |
2334 | f15689c7 | Alex Williamson | pci_register_vga(&vdev->pdev, &vdev->vga.region[QEMU_PCI_VGA_MEM].mem, |
2335 | f15689c7 | Alex Williamson | &vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem, |
2336 | f15689c7 | Alex Williamson | &vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem); |
2337 | 7076eabc | Alex Williamson | vfio_vga_quirk_setup(vdev); |
2338 | f15689c7 | Alex Williamson | } |
2339 | 65501a74 | Alex Williamson | } |
2340 | 65501a74 | Alex Williamson | |
2341 | 65501a74 | Alex Williamson | static void vfio_unmap_bars(VFIODevice *vdev) |
2342 | 65501a74 | Alex Williamson | { |
2343 | 65501a74 | Alex Williamson | int i;
|
2344 | 65501a74 | Alex Williamson | |
2345 | 65501a74 | Alex Williamson | for (i = 0; i < PCI_ROM_SLOT; i++) { |
2346 | 65501a74 | Alex Williamson | vfio_unmap_bar(vdev, i); |
2347 | 65501a74 | Alex Williamson | } |
2348 | f15689c7 | Alex Williamson | |
2349 | f15689c7 | Alex Williamson | if (vdev->has_vga) {
|
2350 | 7076eabc | Alex Williamson | vfio_vga_quirk_teardown(vdev); |
2351 | f15689c7 | Alex Williamson | pci_unregister_vga(&vdev->pdev); |
2352 | f15689c7 | Alex Williamson | memory_region_destroy(&vdev->vga.region[QEMU_PCI_VGA_MEM].mem); |
2353 | f15689c7 | Alex Williamson | memory_region_destroy(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].mem); |
2354 | f15689c7 | Alex Williamson | memory_region_destroy(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem); |
2355 | f15689c7 | Alex Williamson | } |
2356 | 65501a74 | Alex Williamson | } |
2357 | 65501a74 | Alex Williamson | |
2358 | 65501a74 | Alex Williamson | /*
|
2359 | 65501a74 | Alex Williamson | * General setup
|
2360 | 65501a74 | Alex Williamson | */
|
2361 | 65501a74 | Alex Williamson | static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
|
2362 | 65501a74 | Alex Williamson | { |
2363 | 65501a74 | Alex Williamson | uint8_t tmp, next = 0xff;
|
2364 | 65501a74 | Alex Williamson | |
2365 | 65501a74 | Alex Williamson | for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp;
|
2366 | 65501a74 | Alex Williamson | tmp = pdev->config[tmp + 1]) {
|
2367 | 65501a74 | Alex Williamson | if (tmp > pos && tmp < next) {
|
2368 | 65501a74 | Alex Williamson | next = tmp; |
2369 | 65501a74 | Alex Williamson | } |
2370 | 65501a74 | Alex Williamson | } |
2371 | 65501a74 | Alex Williamson | |
2372 | 65501a74 | Alex Williamson | return next - pos;
|
2373 | 65501a74 | Alex Williamson | } |
2374 | 65501a74 | Alex Williamson | |
2375 | 96adc5c7 | Alex Williamson | static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask) |
2376 | 96adc5c7 | Alex Williamson | { |
2377 | 96adc5c7 | Alex Williamson | pci_set_word(buf, (pci_get_word(buf) & ~mask) | val); |
2378 | 96adc5c7 | Alex Williamson | } |
2379 | 96adc5c7 | Alex Williamson | |
2380 | 96adc5c7 | Alex Williamson | static void vfio_add_emulated_word(VFIODevice *vdev, int pos, |
2381 | 96adc5c7 | Alex Williamson | uint16_t val, uint16_t mask) |
2382 | 96adc5c7 | Alex Williamson | { |
2383 | 96adc5c7 | Alex Williamson | vfio_set_word_bits(vdev->pdev.config + pos, val, mask); |
2384 | 96adc5c7 | Alex Williamson | vfio_set_word_bits(vdev->pdev.wmask + pos, ~mask, mask); |
2385 | 96adc5c7 | Alex Williamson | vfio_set_word_bits(vdev->emulated_config_bits + pos, mask, mask); |
2386 | 96adc5c7 | Alex Williamson | } |
2387 | 96adc5c7 | Alex Williamson | |
2388 | 96adc5c7 | Alex Williamson | static void vfio_set_long_bits(uint8_t *buf, uint32_t val, uint32_t mask) |
2389 | 96adc5c7 | Alex Williamson | { |
2390 | 96adc5c7 | Alex Williamson | pci_set_long(buf, (pci_get_long(buf) & ~mask) | val); |
2391 | 96adc5c7 | Alex Williamson | } |
2392 | 96adc5c7 | Alex Williamson | |
2393 | 96adc5c7 | Alex Williamson | static void vfio_add_emulated_long(VFIODevice *vdev, int pos, |
2394 | 96adc5c7 | Alex Williamson | uint32_t val, uint32_t mask) |
2395 | 96adc5c7 | Alex Williamson | { |
2396 | 96adc5c7 | Alex Williamson | vfio_set_long_bits(vdev->pdev.config + pos, val, mask); |
2397 | 96adc5c7 | Alex Williamson | vfio_set_long_bits(vdev->pdev.wmask + pos, ~mask, mask); |
2398 | 96adc5c7 | Alex Williamson | vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); |
2399 | 96adc5c7 | Alex Williamson | } |
2400 | 96adc5c7 | Alex Williamson | |
2401 | 96adc5c7 | Alex Williamson | static int vfio_setup_pcie_cap(VFIODevice *vdev, int pos, uint8_t size) |
2402 | 96adc5c7 | Alex Williamson | { |
2403 | 96adc5c7 | Alex Williamson | uint16_t flags; |
2404 | 96adc5c7 | Alex Williamson | uint8_t type; |
2405 | 96adc5c7 | Alex Williamson | |
2406 | 96adc5c7 | Alex Williamson | flags = pci_get_word(vdev->pdev.config + pos + PCI_CAP_FLAGS); |
2407 | 96adc5c7 | Alex Williamson | type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
|
2408 | 96adc5c7 | Alex Williamson | |
2409 | 96adc5c7 | Alex Williamson | if (type != PCI_EXP_TYPE_ENDPOINT &&
|
2410 | 96adc5c7 | Alex Williamson | type != PCI_EXP_TYPE_LEG_END && |
2411 | 96adc5c7 | Alex Williamson | type != PCI_EXP_TYPE_RC_END) { |
2412 | 96adc5c7 | Alex Williamson | |
2413 | 96adc5c7 | Alex Williamson | error_report("vfio: Assignment of PCIe type 0x%x "
|
2414 | 96adc5c7 | Alex Williamson | "devices is not currently supported", type);
|
2415 | 96adc5c7 | Alex Williamson | return -EINVAL;
|
2416 | 96adc5c7 | Alex Williamson | } |
2417 | 96adc5c7 | Alex Williamson | |
2418 | 96adc5c7 | Alex Williamson | if (!pci_bus_is_express(vdev->pdev.bus)) {
|
2419 | 96adc5c7 | Alex Williamson | /*
|
2420 | 96adc5c7 | Alex Williamson | * Use express capability as-is on PCI bus. It doesn't make much
|
2421 | 96adc5c7 | Alex Williamson | * sense to even expose, but some drivers (ex. tg3) depend on it
|
2422 | 96adc5c7 | Alex Williamson | * and guests don't seem to be particular about it. We'll need
|
2423 | 96adc5c7 | Alex Williamson | * to revist this or force express devices to express buses if we
|
2424 | 96adc5c7 | Alex Williamson | * ever expose an IOMMU to the guest.
|
2425 | 96adc5c7 | Alex Williamson | */
|
2426 | 96adc5c7 | Alex Williamson | } else if (pci_bus_is_root(vdev->pdev.bus)) { |
2427 | 96adc5c7 | Alex Williamson | /*
|
2428 | 96adc5c7 | Alex Williamson | * On a Root Complex bus Endpoints become Root Complex Integrated
|
2429 | 96adc5c7 | Alex Williamson | * Endpoints, which changes the type and clears the LNK & LNK2 fields.
|
2430 | 96adc5c7 | Alex Williamson | */
|
2431 | 96adc5c7 | Alex Williamson | if (type == PCI_EXP_TYPE_ENDPOINT) {
|
2432 | 96adc5c7 | Alex Williamson | vfio_add_emulated_word(vdev, pos + PCI_CAP_FLAGS, |
2433 | 96adc5c7 | Alex Williamson | PCI_EXP_TYPE_RC_END << 4,
|
2434 | 96adc5c7 | Alex Williamson | PCI_EXP_FLAGS_TYPE); |
2435 | 96adc5c7 | Alex Williamson | |
2436 | 96adc5c7 | Alex Williamson | /* Link Capabilities, Status, and Control goes away */
|
2437 | 96adc5c7 | Alex Williamson | if (size > PCI_EXP_LNKCTL) {
|
2438 | 96adc5c7 | Alex Williamson | vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP, 0, ~0); |
2439 | 96adc5c7 | Alex Williamson | vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0); |
2440 | 96adc5c7 | Alex Williamson | vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA, 0, ~0); |
2441 | 96adc5c7 | Alex Williamson | |
2442 | 96adc5c7 | Alex Williamson | #ifndef PCI_EXP_LNKCAP2
|
2443 | 96adc5c7 | Alex Williamson | #define PCI_EXP_LNKCAP2 44 |
2444 | 96adc5c7 | Alex Williamson | #endif
|
2445 | 96adc5c7 | Alex Williamson | #ifndef PCI_EXP_LNKSTA2
|
2446 | 96adc5c7 | Alex Williamson | #define PCI_EXP_LNKSTA2 50 |
2447 | 96adc5c7 | Alex Williamson | #endif
|
2448 | 96adc5c7 | Alex Williamson | /* Link 2 Capabilities, Status, and Control goes away */
|
2449 | 96adc5c7 | Alex Williamson | if (size > PCI_EXP_LNKCAP2) {
|
2450 | 96adc5c7 | Alex Williamson | vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP2, 0, ~0); |
2451 | 96adc5c7 | Alex Williamson | vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL2, 0, ~0); |
2452 | 96adc5c7 | Alex Williamson | vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA2, 0, ~0); |
2453 | 96adc5c7 | Alex Williamson | } |
2454 | 96adc5c7 | Alex Williamson | } |
2455 | 96adc5c7 | Alex Williamson | |
2456 | 96adc5c7 | Alex Williamson | } else if (type == PCI_EXP_TYPE_LEG_END) { |
2457 | 96adc5c7 | Alex Williamson | /*
|
2458 | 96adc5c7 | Alex Williamson | * Legacy endpoints don't belong on the root complex. Windows
|
2459 | 96adc5c7 | Alex Williamson | * seems to be happier with devices if we skip the capability.
|
2460 | 96adc5c7 | Alex Williamson | */
|
2461 | 96adc5c7 | Alex Williamson | return 0; |
2462 | 96adc5c7 | Alex Williamson | } |
2463 | 96adc5c7 | Alex Williamson | |
2464 | 96adc5c7 | Alex Williamson | } else {
|
2465 | 96adc5c7 | Alex Williamson | /*
|
2466 | 96adc5c7 | Alex Williamson | * Convert Root Complex Integrated Endpoints to regular endpoints.
|
2467 | 96adc5c7 | Alex Williamson | * These devices don't support LNK/LNK2 capabilities, so make them up.
|
2468 | 96adc5c7 | Alex Williamson | */
|
2469 | 96adc5c7 | Alex Williamson | if (type == PCI_EXP_TYPE_RC_END) {
|
2470 | 96adc5c7 | Alex Williamson | vfio_add_emulated_word(vdev, pos + PCI_CAP_FLAGS, |
2471 | 96adc5c7 | Alex Williamson | PCI_EXP_TYPE_ENDPOINT << 4,
|
2472 | 96adc5c7 | Alex Williamson | PCI_EXP_FLAGS_TYPE); |
2473 | 96adc5c7 | Alex Williamson | vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP, |
2474 | 96adc5c7 | Alex Williamson | PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25, ~0);
|
2475 | 96adc5c7 | Alex Williamson | vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0); |
2476 | 96adc5c7 | Alex Williamson | } |
2477 | 96adc5c7 | Alex Williamson | |
2478 | 96adc5c7 | Alex Williamson | /* Mark the Link Status bits as emulated to allow virtual negotiation */
|
2479 | 96adc5c7 | Alex Williamson | vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA, |
2480 | 96adc5c7 | Alex Williamson | pci_get_word(vdev->pdev.config + pos + |
2481 | 96adc5c7 | Alex Williamson | PCI_EXP_LNKSTA), |
2482 | 96adc5c7 | Alex Williamson | PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); |
2483 | 96adc5c7 | Alex Williamson | } |
2484 | 96adc5c7 | Alex Williamson | |
2485 | 96adc5c7 | Alex Williamson | pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size); |
2486 | 96adc5c7 | Alex Williamson | if (pos >= 0) { |
2487 | 96adc5c7 | Alex Williamson | vdev->pdev.exp.exp_cap = pos; |
2488 | 96adc5c7 | Alex Williamson | } |
2489 | 96adc5c7 | Alex Williamson | |
2490 | 96adc5c7 | Alex Williamson | return pos;
|
2491 | 96adc5c7 | Alex Williamson | } |
2492 | 96adc5c7 | Alex Williamson | |
2493 | 65501a74 | Alex Williamson | static int vfio_add_std_cap(VFIODevice *vdev, uint8_t pos) |
2494 | 65501a74 | Alex Williamson | { |
2495 | 65501a74 | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
2496 | 65501a74 | Alex Williamson | uint8_t cap_id, next, size; |
2497 | 65501a74 | Alex Williamson | int ret;
|
2498 | 65501a74 | Alex Williamson | |
2499 | 65501a74 | Alex Williamson | cap_id = pdev->config[pos]; |
2500 | 65501a74 | Alex Williamson | next = pdev->config[pos + 1];
|
2501 | 65501a74 | Alex Williamson | |
2502 | 65501a74 | Alex Williamson | /*
|
2503 | 65501a74 | Alex Williamson | * If it becomes important to configure capabilities to their actual
|
2504 | 65501a74 | Alex Williamson | * size, use this as the default when it's something we don't recognize.
|
2505 | 65501a74 | Alex Williamson | * Since QEMU doesn't actually handle many of the config accesses,
|
2506 | 65501a74 | Alex Williamson | * exact size doesn't seem worthwhile.
|
2507 | 65501a74 | Alex Williamson | */
|
2508 | 65501a74 | Alex Williamson | size = vfio_std_cap_max_size(pdev, pos); |
2509 | 65501a74 | Alex Williamson | |
2510 | 65501a74 | Alex Williamson | /*
|
2511 | 65501a74 | Alex Williamson | * pci_add_capability always inserts the new capability at the head
|
2512 | 65501a74 | Alex Williamson | * of the chain. Therefore to end up with a chain that matches the
|
2513 | 65501a74 | Alex Williamson | * physical device, we insert from the end by making this recursive.
|
2514 | 65501a74 | Alex Williamson | * This is also why we pre-caclulate size above as cached config space
|
2515 | 65501a74 | Alex Williamson | * will be changed as we unwind the stack.
|
2516 | 65501a74 | Alex Williamson | */
|
2517 | 65501a74 | Alex Williamson | if (next) {
|
2518 | 65501a74 | Alex Williamson | ret = vfio_add_std_cap(vdev, next); |
2519 | 65501a74 | Alex Williamson | if (ret) {
|
2520 | 65501a74 | Alex Williamson | return ret;
|
2521 | 65501a74 | Alex Williamson | } |
2522 | 65501a74 | Alex Williamson | } else {
|
2523 | 96adc5c7 | Alex Williamson | /* Begin the rebuild, use QEMU emulated list bits */
|
2524 | 96adc5c7 | Alex Williamson | pdev->config[PCI_CAPABILITY_LIST] = 0;
|
2525 | 96adc5c7 | Alex Williamson | vdev->emulated_config_bits[PCI_CAPABILITY_LIST] = 0xff;
|
2526 | 96adc5c7 | Alex Williamson | vdev->emulated_config_bits[PCI_STATUS] |= PCI_STATUS_CAP_LIST; |
2527 | 65501a74 | Alex Williamson | } |
2528 | 65501a74 | Alex Williamson | |
2529 | 96adc5c7 | Alex Williamson | /* Use emulated next pointer to allow dropping caps */
|
2530 | 96adc5c7 | Alex Williamson | pci_set_byte(vdev->emulated_config_bits + pos + 1, 0xff); |
2531 | 96adc5c7 | Alex Williamson | |
2532 | 65501a74 | Alex Williamson | switch (cap_id) {
|
2533 | 65501a74 | Alex Williamson | case PCI_CAP_ID_MSI:
|
2534 | 65501a74 | Alex Williamson | ret = vfio_setup_msi(vdev, pos); |
2535 | 65501a74 | Alex Williamson | break;
|
2536 | 96adc5c7 | Alex Williamson | case PCI_CAP_ID_EXP:
|
2537 | 96adc5c7 | Alex Williamson | ret = vfio_setup_pcie_cap(vdev, pos, size); |
2538 | 96adc5c7 | Alex Williamson | break;
|
2539 | 65501a74 | Alex Williamson | case PCI_CAP_ID_MSIX:
|
2540 | 65501a74 | Alex Williamson | ret = vfio_setup_msix(vdev, pos); |
2541 | 65501a74 | Alex Williamson | break;
|
2542 | ba661818 | Alex Williamson | case PCI_CAP_ID_PM:
|
2543 | ba661818 | Alex Williamson | vdev->pm_cap = pos; |
2544 | 65501a74 | Alex Williamson | default:
|
2545 | 65501a74 | Alex Williamson | ret = pci_add_capability(pdev, cap_id, pos, size); |
2546 | 65501a74 | Alex Williamson | break;
|
2547 | 65501a74 | Alex Williamson | } |
2548 | 65501a74 | Alex Williamson | |
2549 | 65501a74 | Alex Williamson | if (ret < 0) { |
2550 | 65501a74 | Alex Williamson | error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
|
2551 | 312fd5f2 | Markus Armbruster | "0x%x[0x%x]@0x%x: %d", vdev->host.domain,
|
2552 | 65501a74 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function, |
2553 | 65501a74 | Alex Williamson | cap_id, size, pos, ret); |
2554 | 65501a74 | Alex Williamson | return ret;
|
2555 | 65501a74 | Alex Williamson | } |
2556 | 65501a74 | Alex Williamson | |
2557 | 65501a74 | Alex Williamson | return 0; |
2558 | 65501a74 | Alex Williamson | } |
2559 | 65501a74 | Alex Williamson | |
2560 | 65501a74 | Alex Williamson | static int vfio_add_capabilities(VFIODevice *vdev) |
2561 | 65501a74 | Alex Williamson | { |
2562 | 65501a74 | Alex Williamson | PCIDevice *pdev = &vdev->pdev; |
2563 | 65501a74 | Alex Williamson | |
2564 | 65501a74 | Alex Williamson | if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
|
2565 | 65501a74 | Alex Williamson | !pdev->config[PCI_CAPABILITY_LIST]) { |
2566 | 65501a74 | Alex Williamson | return 0; /* Nothing to add */ |
2567 | 65501a74 | Alex Williamson | } |
2568 | 65501a74 | Alex Williamson | |
2569 | 65501a74 | Alex Williamson | return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
|
2570 | 65501a74 | Alex Williamson | } |
2571 | 65501a74 | Alex Williamson | |
2572 | 65501a74 | Alex Williamson | static int vfio_load_rom(VFIODevice *vdev) |
2573 | 65501a74 | Alex Williamson | { |
2574 | 65501a74 | Alex Williamson | uint64_t size = vdev->rom_size; |
2575 | 65501a74 | Alex Williamson | char name[32]; |
2576 | 65501a74 | Alex Williamson | off_t off = 0, voff = vdev->rom_offset;
|
2577 | 65501a74 | Alex Williamson | ssize_t bytes; |
2578 | 65501a74 | Alex Williamson | void *ptr;
|
2579 | 65501a74 | Alex Williamson | |
2580 | 65501a74 | Alex Williamson | /* If loading ROM from file, pci handles it */
|
2581 | 65501a74 | Alex Williamson | if (vdev->pdev.romfile || !vdev->pdev.rom_bar || !size) {
|
2582 | 65501a74 | Alex Williamson | return 0; |
2583 | 65501a74 | Alex Williamson | } |
2584 | 65501a74 | Alex Williamson | |
2585 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
|
2586 | 65501a74 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
2587 | 65501a74 | Alex Williamson | |
2588 | 65501a74 | Alex Williamson | snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom", |
2589 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
2590 | 65501a74 | Alex Williamson | vdev->host.function); |
2591 | 65501a74 | Alex Williamson | memory_region_init_ram(&vdev->pdev.rom, name, size); |
2592 | 65501a74 | Alex Williamson | ptr = memory_region_get_ram_ptr(&vdev->pdev.rom); |
2593 | 65501a74 | Alex Williamson | memset(ptr, 0xff, size);
|
2594 | 65501a74 | Alex Williamson | |
2595 | 65501a74 | Alex Williamson | while (size) {
|
2596 | 65501a74 | Alex Williamson | bytes = pread(vdev->fd, ptr + off, size, voff + off); |
2597 | 65501a74 | Alex Williamson | if (bytes == 0) { |
2598 | 65501a74 | Alex Williamson | break; /* expect that we could get back less than the ROM BAR */ |
2599 | 65501a74 | Alex Williamson | } else if (bytes > 0) { |
2600 | 65501a74 | Alex Williamson | off += bytes; |
2601 | 65501a74 | Alex Williamson | size -= bytes; |
2602 | 65501a74 | Alex Williamson | } else {
|
2603 | 65501a74 | Alex Williamson | if (errno == EINTR || errno == EAGAIN) {
|
2604 | 65501a74 | Alex Williamson | continue;
|
2605 | 65501a74 | Alex Williamson | } |
2606 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error reading device ROM: %m");
|
2607 | 65501a74 | Alex Williamson | memory_region_destroy(&vdev->pdev.rom); |
2608 | 65501a74 | Alex Williamson | return -errno;
|
2609 | 65501a74 | Alex Williamson | } |
2610 | 65501a74 | Alex Williamson | } |
2611 | 65501a74 | Alex Williamson | |
2612 | 65501a74 | Alex Williamson | pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, 0, &vdev->pdev.rom);
|
2613 | 65501a74 | Alex Williamson | vdev->pdev.has_rom = true;
|
2614 | 65501a74 | Alex Williamson | return 0; |
2615 | 65501a74 | Alex Williamson | } |
2616 | 65501a74 | Alex Williamson | |
2617 | 65501a74 | Alex Williamson | static int vfio_connect_container(VFIOGroup *group) |
2618 | 65501a74 | Alex Williamson | { |
2619 | 65501a74 | Alex Williamson | VFIOContainer *container; |
2620 | 65501a74 | Alex Williamson | int ret, fd;
|
2621 | 65501a74 | Alex Williamson | |
2622 | 65501a74 | Alex Williamson | if (group->container) {
|
2623 | 65501a74 | Alex Williamson | return 0; |
2624 | 65501a74 | Alex Williamson | } |
2625 | 65501a74 | Alex Williamson | |
2626 | 65501a74 | Alex Williamson | QLIST_FOREACH(container, &container_list, next) { |
2627 | 65501a74 | Alex Williamson | if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
|
2628 | 65501a74 | Alex Williamson | group->container = container; |
2629 | 65501a74 | Alex Williamson | QLIST_INSERT_HEAD(&container->group_list, group, container_next); |
2630 | 65501a74 | Alex Williamson | return 0; |
2631 | 65501a74 | Alex Williamson | } |
2632 | 65501a74 | Alex Williamson | } |
2633 | 65501a74 | Alex Williamson | |
2634 | 65501a74 | Alex Williamson | fd = qemu_open("/dev/vfio/vfio", O_RDWR);
|
2635 | 65501a74 | Alex Williamson | if (fd < 0) { |
2636 | 312fd5f2 | Markus Armbruster | error_report("vfio: failed to open /dev/vfio/vfio: %m");
|
2637 | 65501a74 | Alex Williamson | return -errno;
|
2638 | 65501a74 | Alex Williamson | } |
2639 | 65501a74 | Alex Williamson | |
2640 | 65501a74 | Alex Williamson | ret = ioctl(fd, VFIO_GET_API_VERSION); |
2641 | 65501a74 | Alex Williamson | if (ret != VFIO_API_VERSION) {
|
2642 | 65501a74 | Alex Williamson | error_report("vfio: supported vfio version: %d, "
|
2643 | 312fd5f2 | Markus Armbruster | "reported version: %d", VFIO_API_VERSION, ret);
|
2644 | 65501a74 | Alex Williamson | close(fd); |
2645 | 65501a74 | Alex Williamson | return -EINVAL;
|
2646 | 65501a74 | Alex Williamson | } |
2647 | 65501a74 | Alex Williamson | |
2648 | 65501a74 | Alex Williamson | container = g_malloc0(sizeof(*container));
|
2649 | 65501a74 | Alex Williamson | container->fd = fd; |
2650 | 65501a74 | Alex Williamson | |
2651 | 65501a74 | Alex Williamson | if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
|
2652 | 65501a74 | Alex Williamson | ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd); |
2653 | 65501a74 | Alex Williamson | if (ret) {
|
2654 | 312fd5f2 | Markus Armbruster | error_report("vfio: failed to set group container: %m");
|
2655 | 65501a74 | Alex Williamson | g_free(container); |
2656 | 65501a74 | Alex Williamson | close(fd); |
2657 | 65501a74 | Alex Williamson | return -errno;
|
2658 | 65501a74 | Alex Williamson | } |
2659 | 65501a74 | Alex Williamson | |
2660 | 65501a74 | Alex Williamson | ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU); |
2661 | 65501a74 | Alex Williamson | if (ret) {
|
2662 | 312fd5f2 | Markus Armbruster | error_report("vfio: failed to set iommu for container: %m");
|
2663 | 65501a74 | Alex Williamson | g_free(container); |
2664 | 65501a74 | Alex Williamson | close(fd); |
2665 | 65501a74 | Alex Williamson | return -errno;
|
2666 | 65501a74 | Alex Williamson | } |
2667 | 65501a74 | Alex Williamson | |
2668 | 65501a74 | Alex Williamson | container->iommu_data.listener = vfio_memory_listener; |
2669 | 65501a74 | Alex Williamson | container->iommu_data.release = vfio_listener_release; |
2670 | 65501a74 | Alex Williamson | |
2671 | f6790af6 | Avi Kivity | memory_listener_register(&container->iommu_data.listener, &address_space_memory); |
2672 | 65501a74 | Alex Williamson | } else {
|
2673 | 312fd5f2 | Markus Armbruster | error_report("vfio: No available IOMMU models");
|
2674 | 65501a74 | Alex Williamson | g_free(container); |
2675 | 65501a74 | Alex Williamson | close(fd); |
2676 | 65501a74 | Alex Williamson | return -EINVAL;
|
2677 | 65501a74 | Alex Williamson | } |
2678 | 65501a74 | Alex Williamson | |
2679 | 65501a74 | Alex Williamson | QLIST_INIT(&container->group_list); |
2680 | 65501a74 | Alex Williamson | QLIST_INSERT_HEAD(&container_list, container, next); |
2681 | 65501a74 | Alex Williamson | |
2682 | 65501a74 | Alex Williamson | group->container = container; |
2683 | 65501a74 | Alex Williamson | QLIST_INSERT_HEAD(&container->group_list, group, container_next); |
2684 | 65501a74 | Alex Williamson | |
2685 | 65501a74 | Alex Williamson | return 0; |
2686 | 65501a74 | Alex Williamson | } |
2687 | 65501a74 | Alex Williamson | |
2688 | 65501a74 | Alex Williamson | static void vfio_disconnect_container(VFIOGroup *group) |
2689 | 65501a74 | Alex Williamson | { |
2690 | 65501a74 | Alex Williamson | VFIOContainer *container = group->container; |
2691 | 65501a74 | Alex Williamson | |
2692 | 65501a74 | Alex Williamson | if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
|
2693 | 312fd5f2 | Markus Armbruster | error_report("vfio: error disconnecting group %d from container",
|
2694 | 65501a74 | Alex Williamson | group->groupid); |
2695 | 65501a74 | Alex Williamson | } |
2696 | 65501a74 | Alex Williamson | |
2697 | 65501a74 | Alex Williamson | QLIST_REMOVE(group, container_next); |
2698 | 65501a74 | Alex Williamson | group->container = NULL;
|
2699 | 65501a74 | Alex Williamson | |
2700 | 65501a74 | Alex Williamson | if (QLIST_EMPTY(&container->group_list)) {
|
2701 | 65501a74 | Alex Williamson | if (container->iommu_data.release) {
|
2702 | 65501a74 | Alex Williamson | container->iommu_data.release(container); |
2703 | 65501a74 | Alex Williamson | } |
2704 | 65501a74 | Alex Williamson | QLIST_REMOVE(container, next); |
2705 | 65501a74 | Alex Williamson | DPRINTF("vfio_disconnect_container: close container->fd\n");
|
2706 | 65501a74 | Alex Williamson | close(container->fd); |
2707 | 65501a74 | Alex Williamson | g_free(container); |
2708 | 65501a74 | Alex Williamson | } |
2709 | 65501a74 | Alex Williamson | } |
2710 | 65501a74 | Alex Williamson | |
2711 | 65501a74 | Alex Williamson | static VFIOGroup *vfio_get_group(int groupid) |
2712 | 65501a74 | Alex Williamson | { |
2713 | 65501a74 | Alex Williamson | VFIOGroup *group; |
2714 | 65501a74 | Alex Williamson | char path[32]; |
2715 | 65501a74 | Alex Williamson | struct vfio_group_status status = { .argsz = sizeof(status) }; |
2716 | 65501a74 | Alex Williamson | |
2717 | 65501a74 | Alex Williamson | QLIST_FOREACH(group, &group_list, next) { |
2718 | 65501a74 | Alex Williamson | if (group->groupid == groupid) {
|
2719 | 65501a74 | Alex Williamson | return group;
|
2720 | 65501a74 | Alex Williamson | } |
2721 | 65501a74 | Alex Williamson | } |
2722 | 65501a74 | Alex Williamson | |
2723 | 65501a74 | Alex Williamson | group = g_malloc0(sizeof(*group));
|
2724 | 65501a74 | Alex Williamson | |
2725 | 65501a74 | Alex Williamson | snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); |
2726 | 65501a74 | Alex Williamson | group->fd = qemu_open(path, O_RDWR); |
2727 | 65501a74 | Alex Williamson | if (group->fd < 0) { |
2728 | 312fd5f2 | Markus Armbruster | error_report("vfio: error opening %s: %m", path);
|
2729 | 65501a74 | Alex Williamson | g_free(group); |
2730 | 65501a74 | Alex Williamson | return NULL; |
2731 | 65501a74 | Alex Williamson | } |
2732 | 65501a74 | Alex Williamson | |
2733 | 65501a74 | Alex Williamson | if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
|
2734 | 312fd5f2 | Markus Armbruster | error_report("vfio: error getting group status: %m");
|
2735 | 65501a74 | Alex Williamson | close(group->fd); |
2736 | 65501a74 | Alex Williamson | g_free(group); |
2737 | 65501a74 | Alex Williamson | return NULL; |
2738 | 65501a74 | Alex Williamson | } |
2739 | 65501a74 | Alex Williamson | |
2740 | 65501a74 | Alex Williamson | if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
|
2741 | 65501a74 | Alex Williamson | error_report("vfio: error, group %d is not viable, please ensure "
|
2742 | 65501a74 | Alex Williamson | "all devices within the iommu_group are bound to their "
|
2743 | 312fd5f2 | Markus Armbruster | "vfio bus driver.", groupid);
|
2744 | 65501a74 | Alex Williamson | close(group->fd); |
2745 | 65501a74 | Alex Williamson | g_free(group); |
2746 | 65501a74 | Alex Williamson | return NULL; |
2747 | 65501a74 | Alex Williamson | } |
2748 | 65501a74 | Alex Williamson | |
2749 | 65501a74 | Alex Williamson | group->groupid = groupid; |
2750 | 65501a74 | Alex Williamson | QLIST_INIT(&group->device_list); |
2751 | 65501a74 | Alex Williamson | |
2752 | 65501a74 | Alex Williamson | if (vfio_connect_container(group)) {
|
2753 | 312fd5f2 | Markus Armbruster | error_report("vfio: failed to setup container for group %d", groupid);
|
2754 | 65501a74 | Alex Williamson | close(group->fd); |
2755 | 65501a74 | Alex Williamson | g_free(group); |
2756 | 65501a74 | Alex Williamson | return NULL; |
2757 | 65501a74 | Alex Williamson | } |
2758 | 65501a74 | Alex Williamson | |
2759 | 65501a74 | Alex Williamson | QLIST_INSERT_HEAD(&group_list, group, next); |
2760 | 65501a74 | Alex Williamson | |
2761 | 65501a74 | Alex Williamson | return group;
|
2762 | 65501a74 | Alex Williamson | } |
2763 | 65501a74 | Alex Williamson | |
2764 | 65501a74 | Alex Williamson | static void vfio_put_group(VFIOGroup *group) |
2765 | 65501a74 | Alex Williamson | { |
2766 | 65501a74 | Alex Williamson | if (!QLIST_EMPTY(&group->device_list)) {
|
2767 | 65501a74 | Alex Williamson | return;
|
2768 | 65501a74 | Alex Williamson | } |
2769 | 65501a74 | Alex Williamson | |
2770 | 65501a74 | Alex Williamson | vfio_disconnect_container(group); |
2771 | 65501a74 | Alex Williamson | QLIST_REMOVE(group, next); |
2772 | 65501a74 | Alex Williamson | DPRINTF("vfio_put_group: close group->fd\n");
|
2773 | 65501a74 | Alex Williamson | close(group->fd); |
2774 | 65501a74 | Alex Williamson | g_free(group); |
2775 | 65501a74 | Alex Williamson | } |
2776 | 65501a74 | Alex Williamson | |
2777 | 65501a74 | Alex Williamson | static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev) |
2778 | 65501a74 | Alex Williamson | { |
2779 | 65501a74 | Alex Williamson | struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; |
2780 | 65501a74 | Alex Williamson | struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; |
2781 | 65501a74 | Alex Williamson | int ret, i;
|
2782 | 65501a74 | Alex Williamson | |
2783 | 65501a74 | Alex Williamson | ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); |
2784 | 65501a74 | Alex Williamson | if (ret < 0) { |
2785 | 1a9522cc | Markus Armbruster | error_report("vfio: error getting device %s from group %d: %m",
|
2786 | 65501a74 | Alex Williamson | name, group->groupid); |
2787 | 1a9522cc | Markus Armbruster | error_printf("Verify all devices in group %d are bound to vfio-pci "
|
2788 | 65501a74 | Alex Williamson | "or pci-stub and not already in use\n", group->groupid);
|
2789 | 65501a74 | Alex Williamson | return ret;
|
2790 | 65501a74 | Alex Williamson | } |
2791 | 65501a74 | Alex Williamson | |
2792 | 65501a74 | Alex Williamson | vdev->fd = ret; |
2793 | 65501a74 | Alex Williamson | vdev->group = group; |
2794 | 65501a74 | Alex Williamson | QLIST_INSERT_HEAD(&group->device_list, vdev, next); |
2795 | 65501a74 | Alex Williamson | |
2796 | 65501a74 | Alex Williamson | /* Sanity check device */
|
2797 | 65501a74 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &dev_info); |
2798 | 65501a74 | Alex Williamson | if (ret) {
|
2799 | 312fd5f2 | Markus Armbruster | error_report("vfio: error getting device info: %m");
|
2800 | 65501a74 | Alex Williamson | goto error;
|
2801 | 65501a74 | Alex Williamson | } |
2802 | 65501a74 | Alex Williamson | |
2803 | 65501a74 | Alex Williamson | DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
|
2804 | 65501a74 | Alex Williamson | dev_info.flags, dev_info.num_regions, dev_info.num_irqs); |
2805 | 65501a74 | Alex Williamson | |
2806 | 65501a74 | Alex Williamson | if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
|
2807 | 312fd5f2 | Markus Armbruster | error_report("vfio: Um, this isn't a PCI device");
|
2808 | 65501a74 | Alex Williamson | goto error;
|
2809 | 65501a74 | Alex Williamson | } |
2810 | 65501a74 | Alex Williamson | |
2811 | 65501a74 | Alex Williamson | vdev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); |
2812 | 65501a74 | Alex Williamson | if (!vdev->reset_works) {
|
2813 | 312fd5f2 | Markus Armbruster | error_report("Warning, device %s does not support reset", name);
|
2814 | 65501a74 | Alex Williamson | } |
2815 | 65501a74 | Alex Williamson | |
2816 | 8fc94e5a | Alex Williamson | if (dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) { |
2817 | 312fd5f2 | Markus Armbruster | error_report("vfio: unexpected number of io regions %u",
|
2818 | 65501a74 | Alex Williamson | dev_info.num_regions); |
2819 | 65501a74 | Alex Williamson | goto error;
|
2820 | 65501a74 | Alex Williamson | } |
2821 | 65501a74 | Alex Williamson | |
2822 | 8fc94e5a | Alex Williamson | if (dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) { |
2823 | 312fd5f2 | Markus Armbruster | error_report("vfio: unexpected number of irqs %u", dev_info.num_irqs);
|
2824 | 65501a74 | Alex Williamson | goto error;
|
2825 | 65501a74 | Alex Williamson | } |
2826 | 65501a74 | Alex Williamson | |
2827 | 65501a74 | Alex Williamson | for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
|
2828 | 65501a74 | Alex Williamson | reg_info.index = i; |
2829 | 65501a74 | Alex Williamson | |
2830 | 65501a74 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); |
2831 | 65501a74 | Alex Williamson | if (ret) {
|
2832 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error getting region %d info: %m", i);
|
2833 | 65501a74 | Alex Williamson | goto error;
|
2834 | 65501a74 | Alex Williamson | } |
2835 | 65501a74 | Alex Williamson | |
2836 | 65501a74 | Alex Williamson | DPRINTF("Device %s region %d:\n", name, i);
|
2837 | 65501a74 | Alex Williamson | DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
|
2838 | 65501a74 | Alex Williamson | (unsigned long)reg_info.size, (unsigned long)reg_info.offset, |
2839 | 65501a74 | Alex Williamson | (unsigned long)reg_info.flags); |
2840 | 65501a74 | Alex Williamson | |
2841 | 65501a74 | Alex Williamson | vdev->bars[i].flags = reg_info.flags; |
2842 | 65501a74 | Alex Williamson | vdev->bars[i].size = reg_info.size; |
2843 | 65501a74 | Alex Williamson | vdev->bars[i].fd_offset = reg_info.offset; |
2844 | 65501a74 | Alex Williamson | vdev->bars[i].fd = vdev->fd; |
2845 | 65501a74 | Alex Williamson | vdev->bars[i].nr = i; |
2846 | 7076eabc | Alex Williamson | QLIST_INIT(&vdev->bars[i].quirks); |
2847 | 65501a74 | Alex Williamson | } |
2848 | 65501a74 | Alex Williamson | |
2849 | 65501a74 | Alex Williamson | reg_info.index = VFIO_PCI_ROM_REGION_INDEX; |
2850 | 65501a74 | Alex Williamson | |
2851 | 65501a74 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); |
2852 | 65501a74 | Alex Williamson | if (ret) {
|
2853 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error getting ROM info: %m");
|
2854 | 65501a74 | Alex Williamson | goto error;
|
2855 | 65501a74 | Alex Williamson | } |
2856 | 65501a74 | Alex Williamson | |
2857 | 65501a74 | Alex Williamson | DPRINTF("Device %s ROM:\n", name);
|
2858 | 65501a74 | Alex Williamson | DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
|
2859 | 65501a74 | Alex Williamson | (unsigned long)reg_info.size, (unsigned long)reg_info.offset, |
2860 | 65501a74 | Alex Williamson | (unsigned long)reg_info.flags); |
2861 | 65501a74 | Alex Williamson | |
2862 | 65501a74 | Alex Williamson | vdev->rom_size = reg_info.size; |
2863 | 65501a74 | Alex Williamson | vdev->rom_offset = reg_info.offset; |
2864 | 65501a74 | Alex Williamson | |
2865 | 65501a74 | Alex Williamson | reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX; |
2866 | 65501a74 | Alex Williamson | |
2867 | 65501a74 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); |
2868 | 65501a74 | Alex Williamson | if (ret) {
|
2869 | 312fd5f2 | Markus Armbruster | error_report("vfio: Error getting config info: %m");
|
2870 | 65501a74 | Alex Williamson | goto error;
|
2871 | 65501a74 | Alex Williamson | } |
2872 | 65501a74 | Alex Williamson | |
2873 | 65501a74 | Alex Williamson | DPRINTF("Device %s config:\n", name);
|
2874 | 65501a74 | Alex Williamson | DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
|
2875 | 65501a74 | Alex Williamson | (unsigned long)reg_info.size, (unsigned long)reg_info.offset, |
2876 | 65501a74 | Alex Williamson | (unsigned long)reg_info.flags); |
2877 | 65501a74 | Alex Williamson | |
2878 | 65501a74 | Alex Williamson | vdev->config_size = reg_info.size; |
2879 | 6a659bbf | Alex Williamson | if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) {
|
2880 | 6a659bbf | Alex Williamson | vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS; |
2881 | 6a659bbf | Alex Williamson | } |
2882 | 65501a74 | Alex Williamson | vdev->config_offset = reg_info.offset; |
2883 | 65501a74 | Alex Williamson | |
2884 | f15689c7 | Alex Williamson | if ((vdev->features & VFIO_FEATURE_ENABLE_VGA) &&
|
2885 | f15689c7 | Alex Williamson | dev_info.num_regions > VFIO_PCI_VGA_REGION_INDEX) { |
2886 | f15689c7 | Alex Williamson | struct vfio_region_info vga_info = {
|
2887 | f15689c7 | Alex Williamson | .argsz = sizeof(vga_info),
|
2888 | f15689c7 | Alex Williamson | .index = VFIO_PCI_VGA_REGION_INDEX, |
2889 | f15689c7 | Alex Williamson | }; |
2890 | f15689c7 | Alex Williamson | |
2891 | f15689c7 | Alex Williamson | ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &vga_info); |
2892 | f15689c7 | Alex Williamson | if (ret) {
|
2893 | f15689c7 | Alex Williamson | error_report( |
2894 | f15689c7 | Alex Williamson | "vfio: Device does not support requested feature x-vga");
|
2895 | f15689c7 | Alex Williamson | goto error;
|
2896 | f15689c7 | Alex Williamson | } |
2897 | f15689c7 | Alex Williamson | |
2898 | f15689c7 | Alex Williamson | if (!(vga_info.flags & VFIO_REGION_INFO_FLAG_READ) ||
|
2899 | f15689c7 | Alex Williamson | !(vga_info.flags & VFIO_REGION_INFO_FLAG_WRITE) || |
2900 | f15689c7 | Alex Williamson | vga_info.size < 0xbffff + 1) { |
2901 | f15689c7 | Alex Williamson | error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
|
2902 | f15689c7 | Alex Williamson | (unsigned long)vga_info.flags, |
2903 | f15689c7 | Alex Williamson | (unsigned long)vga_info.size); |
2904 | f15689c7 | Alex Williamson | goto error;
|
2905 | f15689c7 | Alex Williamson | } |
2906 | f15689c7 | Alex Williamson | |
2907 | f15689c7 | Alex Williamson | vdev->vga.fd_offset = vga_info.offset; |
2908 | f15689c7 | Alex Williamson | vdev->vga.fd = vdev->fd; |
2909 | f15689c7 | Alex Williamson | |
2910 | f15689c7 | Alex Williamson | vdev->vga.region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE; |
2911 | f15689c7 | Alex Williamson | vdev->vga.region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM; |
2912 | 7076eabc | Alex Williamson | QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_MEM].quirks); |
2913 | f15689c7 | Alex Williamson | |
2914 | f15689c7 | Alex Williamson | vdev->vga.region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE; |
2915 | f15689c7 | Alex Williamson | vdev->vga.region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO; |
2916 | 7076eabc | Alex Williamson | QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_LO].quirks); |
2917 | f15689c7 | Alex Williamson | |
2918 | f15689c7 | Alex Williamson | vdev->vga.region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE; |
2919 | f15689c7 | Alex Williamson | vdev->vga.region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI; |
2920 | 7076eabc | Alex Williamson | QLIST_INIT(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks); |
2921 | f15689c7 | Alex Williamson | |
2922 | f15689c7 | Alex Williamson | vdev->has_vga = true;
|
2923 | f15689c7 | Alex Williamson | } |
2924 | f15689c7 | Alex Williamson | |
2925 | 65501a74 | Alex Williamson | error:
|
2926 | 65501a74 | Alex Williamson | if (ret) {
|
2927 | 65501a74 | Alex Williamson | QLIST_REMOVE(vdev, next); |
2928 | 65501a74 | Alex Williamson | vdev->group = NULL;
|
2929 | 65501a74 | Alex Williamson | close(vdev->fd); |
2930 | 65501a74 | Alex Williamson | } |
2931 | 65501a74 | Alex Williamson | return ret;
|
2932 | 65501a74 | Alex Williamson | } |
2933 | 65501a74 | Alex Williamson | |
2934 | 65501a74 | Alex Williamson | static void vfio_put_device(VFIODevice *vdev) |
2935 | 65501a74 | Alex Williamson | { |
2936 | 65501a74 | Alex Williamson | QLIST_REMOVE(vdev, next); |
2937 | 65501a74 | Alex Williamson | vdev->group = NULL;
|
2938 | 65501a74 | Alex Williamson | DPRINTF("vfio_put_device: close vdev->fd\n");
|
2939 | 65501a74 | Alex Williamson | close(vdev->fd); |
2940 | 65501a74 | Alex Williamson | if (vdev->msix) {
|
2941 | 65501a74 | Alex Williamson | g_free(vdev->msix); |
2942 | 65501a74 | Alex Williamson | vdev->msix = NULL;
|
2943 | 65501a74 | Alex Williamson | } |
2944 | 65501a74 | Alex Williamson | } |
2945 | 65501a74 | Alex Williamson | |
2946 | 65501a74 | Alex Williamson | static int vfio_initfn(PCIDevice *pdev) |
2947 | 65501a74 | Alex Williamson | { |
2948 | 65501a74 | Alex Williamson | VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev); |
2949 | 65501a74 | Alex Williamson | VFIOGroup *group; |
2950 | 65501a74 | Alex Williamson | char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
|
2951 | 65501a74 | Alex Williamson | ssize_t len; |
2952 | 65501a74 | Alex Williamson | struct stat st;
|
2953 | 65501a74 | Alex Williamson | int groupid;
|
2954 | 65501a74 | Alex Williamson | int ret;
|
2955 | 65501a74 | Alex Williamson | |
2956 | 65501a74 | Alex Williamson | /* Check that the host device exists */
|
2957 | 65501a74 | Alex Williamson | snprintf(path, sizeof(path),
|
2958 | 65501a74 | Alex Williamson | "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
|
2959 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
2960 | 65501a74 | Alex Williamson | vdev->host.function); |
2961 | 65501a74 | Alex Williamson | if (stat(path, &st) < 0) { |
2962 | 312fd5f2 | Markus Armbruster | error_report("vfio: error: no such host device: %s", path);
|
2963 | 65501a74 | Alex Williamson | return -errno;
|
2964 | 65501a74 | Alex Williamson | } |
2965 | 65501a74 | Alex Williamson | |
2966 | 65501a74 | Alex Williamson | strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1); |
2967 | 65501a74 | Alex Williamson | |
2968 | 65501a74 | Alex Williamson | len = readlink(path, iommu_group_path, PATH_MAX); |
2969 | 65501a74 | Alex Williamson | if (len <= 0) { |
2970 | 312fd5f2 | Markus Armbruster | error_report("vfio: error no iommu_group for device");
|
2971 | 65501a74 | Alex Williamson | return -errno;
|
2972 | 65501a74 | Alex Williamson | } |
2973 | 65501a74 | Alex Williamson | |
2974 | 65501a74 | Alex Williamson | iommu_group_path[len] = 0;
|
2975 | 65501a74 | Alex Williamson | group_name = basename(iommu_group_path); |
2976 | 65501a74 | Alex Williamson | |
2977 | 65501a74 | Alex Williamson | if (sscanf(group_name, "%d", &groupid) != 1) { |
2978 | 312fd5f2 | Markus Armbruster | error_report("vfio: error reading %s: %m", path);
|
2979 | 65501a74 | Alex Williamson | return -errno;
|
2980 | 65501a74 | Alex Williamson | } |
2981 | 65501a74 | Alex Williamson | |
2982 | 65501a74 | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
|
2983 | 65501a74 | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function, groupid); |
2984 | 65501a74 | Alex Williamson | |
2985 | 65501a74 | Alex Williamson | group = vfio_get_group(groupid); |
2986 | 65501a74 | Alex Williamson | if (!group) {
|
2987 | 312fd5f2 | Markus Armbruster | error_report("vfio: failed to get group %d", groupid);
|
2988 | 65501a74 | Alex Williamson | return -ENOENT;
|
2989 | 65501a74 | Alex Williamson | } |
2990 | 65501a74 | Alex Williamson | |
2991 | 65501a74 | Alex Williamson | snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x", |
2992 | 65501a74 | Alex Williamson | vdev->host.domain, vdev->host.bus, vdev->host.slot, |
2993 | 65501a74 | Alex Williamson | vdev->host.function); |
2994 | 65501a74 | Alex Williamson | |
2995 | 65501a74 | Alex Williamson | QLIST_FOREACH(pvdev, &group->device_list, next) { |
2996 | 65501a74 | Alex Williamson | if (pvdev->host.domain == vdev->host.domain &&
|
2997 | 65501a74 | Alex Williamson | pvdev->host.bus == vdev->host.bus && |
2998 | 65501a74 | Alex Williamson | pvdev->host.slot == vdev->host.slot && |
2999 | 65501a74 | Alex Williamson | pvdev->host.function == vdev->host.function) { |
3000 | 65501a74 | Alex Williamson | |
3001 | 312fd5f2 | Markus Armbruster | error_report("vfio: error: device %s is already attached", path);
|
3002 | 65501a74 | Alex Williamson | vfio_put_group(group); |
3003 | 65501a74 | Alex Williamson | return -EBUSY;
|
3004 | 65501a74 | Alex Williamson | } |
3005 | 65501a74 | Alex Williamson | } |
3006 | 65501a74 | Alex Williamson | |
3007 | 65501a74 | Alex Williamson | ret = vfio_get_device(group, path, vdev); |
3008 | 65501a74 | Alex Williamson | if (ret) {
|
3009 | 312fd5f2 | Markus Armbruster | error_report("vfio: failed to get device %s", path);
|
3010 | 65501a74 | Alex Williamson | vfio_put_group(group); |
3011 | 65501a74 | Alex Williamson | return ret;
|
3012 | 65501a74 | Alex Williamson | } |
3013 | 65501a74 | Alex Williamson | |
3014 | 65501a74 | Alex Williamson | /* Get a copy of config space */
|
3015 | 65501a74 | Alex Williamson | ret = pread(vdev->fd, vdev->pdev.config, |
3016 | 65501a74 | Alex Williamson | MIN(pci_config_size(&vdev->pdev), vdev->config_size), |
3017 | 65501a74 | Alex Williamson | vdev->config_offset); |
3018 | 65501a74 | Alex Williamson | if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { |
3019 | 65501a74 | Alex Williamson | ret = ret < 0 ? -errno : -EFAULT;
|
3020 | 312fd5f2 | Markus Armbruster | error_report("vfio: Failed to read device config space");
|
3021 | 65501a74 | Alex Williamson | goto out_put;
|
3022 | 65501a74 | Alex Williamson | } |
3023 | 65501a74 | Alex Williamson | |
3024 | 4b5d5e87 | Alex Williamson | /* vfio emulates a lot for us, but some bits need extra love */
|
3025 | 4b5d5e87 | Alex Williamson | vdev->emulated_config_bits = g_malloc0(vdev->config_size); |
3026 | 4b5d5e87 | Alex Williamson | |
3027 | 4b5d5e87 | Alex Williamson | /* QEMU can choose to expose the ROM or not */
|
3028 | 4b5d5e87 | Alex Williamson | memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); |
3029 | 4b5d5e87 | Alex Williamson | |
3030 | 4b5d5e87 | Alex Williamson | /* QEMU can change multi-function devices to single function, or reverse */
|
3031 | 4b5d5e87 | Alex Williamson | vdev->emulated_config_bits[PCI_HEADER_TYPE] = |
3032 | 4b5d5e87 | Alex Williamson | PCI_HEADER_TYPE_MULTI_FUNCTION; |
3033 | 4b5d5e87 | Alex Williamson | |
3034 | 65501a74 | Alex Williamson | /*
|
3035 | 65501a74 | Alex Williamson | * Clear host resource mapping info. If we choose not to register a
|
3036 | 65501a74 | Alex Williamson | * BAR, such as might be the case with the option ROM, we can get
|
3037 | 65501a74 | Alex Williamson | * confusing, unwritable, residual addresses from the host here.
|
3038 | 65501a74 | Alex Williamson | */
|
3039 | 65501a74 | Alex Williamson | memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24); |
3040 | 65501a74 | Alex Williamson | memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4); |
3041 | 65501a74 | Alex Williamson | |
3042 | 65501a74 | Alex Williamson | vfio_load_rom(vdev); |
3043 | 65501a74 | Alex Williamson | |
3044 | 65501a74 | Alex Williamson | ret = vfio_early_setup_msix(vdev); |
3045 | 65501a74 | Alex Williamson | if (ret) {
|
3046 | 65501a74 | Alex Williamson | goto out_put;
|
3047 | 65501a74 | Alex Williamson | } |
3048 | 65501a74 | Alex Williamson | |
3049 | 65501a74 | Alex Williamson | vfio_map_bars(vdev); |
3050 | 65501a74 | Alex Williamson | |
3051 | 65501a74 | Alex Williamson | ret = vfio_add_capabilities(vdev); |
3052 | 65501a74 | Alex Williamson | if (ret) {
|
3053 | 65501a74 | Alex Williamson | goto out_teardown;
|
3054 | 65501a74 | Alex Williamson | } |
3055 | 65501a74 | Alex Williamson | |
3056 | 4b5d5e87 | Alex Williamson | /* QEMU emulates all of MSI & MSIX */
|
3057 | 4b5d5e87 | Alex Williamson | if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
|
3058 | 4b5d5e87 | Alex Williamson | memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
|
3059 | 4b5d5e87 | Alex Williamson | MSIX_CAP_LENGTH); |
3060 | 4b5d5e87 | Alex Williamson | } |
3061 | 4b5d5e87 | Alex Williamson | |
3062 | 4b5d5e87 | Alex Williamson | if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
|
3063 | 4b5d5e87 | Alex Williamson | memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
|
3064 | 4b5d5e87 | Alex Williamson | vdev->msi_cap_size); |
3065 | 4b5d5e87 | Alex Williamson | } |
3066 | 4b5d5e87 | Alex Williamson | |
3067 | 65501a74 | Alex Williamson | if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { |
3068 | ea486926 | Alex Williamson | vdev->intx.mmap_timer = qemu_new_timer_ms(vm_clock, |
3069 | ea486926 | Alex Williamson | vfio_intx_mmap_enable, vdev); |
3070 | e1d1e586 | Alex Williamson | pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_update_irq); |
3071 | 65501a74 | Alex Williamson | ret = vfio_enable_intx(vdev); |
3072 | 65501a74 | Alex Williamson | if (ret) {
|
3073 | 65501a74 | Alex Williamson | goto out_teardown;
|
3074 | 65501a74 | Alex Williamson | } |
3075 | 65501a74 | Alex Williamson | } |
3076 | 65501a74 | Alex Williamson | |
3077 | c29029dd | Alex Williamson | add_boot_device_path(vdev->bootindex, &pdev->qdev, NULL);
|
3078 | c29029dd | Alex Williamson | |
3079 | 65501a74 | Alex Williamson | return 0; |
3080 | 65501a74 | Alex Williamson | |
3081 | 65501a74 | Alex Williamson | out_teardown:
|
3082 | 65501a74 | Alex Williamson | pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
|
3083 | 65501a74 | Alex Williamson | vfio_teardown_msi(vdev); |
3084 | 65501a74 | Alex Williamson | vfio_unmap_bars(vdev); |
3085 | 65501a74 | Alex Williamson | out_put:
|
3086 | 4b5d5e87 | Alex Williamson | g_free(vdev->emulated_config_bits); |
3087 | 65501a74 | Alex Williamson | vfio_put_device(vdev); |
3088 | 65501a74 | Alex Williamson | vfio_put_group(group); |
3089 | 65501a74 | Alex Williamson | return ret;
|
3090 | 65501a74 | Alex Williamson | } |
3091 | 65501a74 | Alex Williamson | |
3092 | 65501a74 | Alex Williamson | static void vfio_exitfn(PCIDevice *pdev) |
3093 | 65501a74 | Alex Williamson | { |
3094 | 65501a74 | Alex Williamson | VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); |
3095 | 65501a74 | Alex Williamson | VFIOGroup *group = vdev->group; |
3096 | 65501a74 | Alex Williamson | |
3097 | 65501a74 | Alex Williamson | pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
|
3098 | 65501a74 | Alex Williamson | vfio_disable_interrupts(vdev); |
3099 | ea486926 | Alex Williamson | if (vdev->intx.mmap_timer) {
|
3100 | ea486926 | Alex Williamson | qemu_free_timer(vdev->intx.mmap_timer); |
3101 | ea486926 | Alex Williamson | } |
3102 | 65501a74 | Alex Williamson | vfio_teardown_msi(vdev); |
3103 | 65501a74 | Alex Williamson | vfio_unmap_bars(vdev); |
3104 | 4b5d5e87 | Alex Williamson | g_free(vdev->emulated_config_bits); |
3105 | 65501a74 | Alex Williamson | vfio_put_device(vdev); |
3106 | 65501a74 | Alex Williamson | vfio_put_group(group); |
3107 | 65501a74 | Alex Williamson | } |
3108 | 65501a74 | Alex Williamson | |
3109 | 65501a74 | Alex Williamson | static void vfio_pci_reset(DeviceState *dev) |
3110 | 65501a74 | Alex Williamson | { |
3111 | 65501a74 | Alex Williamson | PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev); |
3112 | 65501a74 | Alex Williamson | VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); |
3113 | 5834a83f | Alex Williamson | uint16_t cmd; |
3114 | 65501a74 | Alex Williamson | |
3115 | 5834a83f | Alex Williamson | DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
|
3116 | 5834a83f | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
3117 | 5834a83f | Alex Williamson | |
3118 | 5834a83f | Alex Williamson | vfio_disable_interrupts(vdev); |
3119 | 65501a74 | Alex Williamson | |
3120 | ba661818 | Alex Williamson | /* Make sure the device is in D0 */
|
3121 | ba661818 | Alex Williamson | if (vdev->pm_cap) {
|
3122 | ba661818 | Alex Williamson | uint16_t pmcsr; |
3123 | ba661818 | Alex Williamson | uint8_t state; |
3124 | ba661818 | Alex Williamson | |
3125 | ba661818 | Alex Williamson | pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
|
3126 | ba661818 | Alex Williamson | state = pmcsr & PCI_PM_CTRL_STATE_MASK; |
3127 | ba661818 | Alex Williamson | if (state) {
|
3128 | ba661818 | Alex Williamson | pmcsr &= ~PCI_PM_CTRL_STATE_MASK; |
3129 | ba661818 | Alex Williamson | vfio_pci_write_config(pdev, vdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
|
3130 | ba661818 | Alex Williamson | /* vfio handles the necessary delay here */
|
3131 | ba661818 | Alex Williamson | pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
|
3132 | ba661818 | Alex Williamson | state = pmcsr & PCI_PM_CTRL_STATE_MASK; |
3133 | ba661818 | Alex Williamson | if (state) {
|
3134 | ba661818 | Alex Williamson | error_report("vfio: Unable to power on device, stuck in D%d\n",
|
3135 | ba661818 | Alex Williamson | state); |
3136 | ba661818 | Alex Williamson | } |
3137 | ba661818 | Alex Williamson | } |
3138 | ba661818 | Alex Williamson | } |
3139 | ba661818 | Alex Williamson | |
3140 | 5834a83f | Alex Williamson | /*
|
3141 | 5834a83f | Alex Williamson | * Stop any ongoing DMA by disconecting I/O, MMIO, and bus master.
|
3142 | 5834a83f | Alex Williamson | * Also put INTx Disable in known state.
|
3143 | 5834a83f | Alex Williamson | */
|
3144 | 5834a83f | Alex Williamson | cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
|
3145 | 5834a83f | Alex Williamson | cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | |
3146 | 5834a83f | Alex Williamson | PCI_COMMAND_INTX_DISABLE); |
3147 | 5834a83f | Alex Williamson | vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
|
3148 | 5834a83f | Alex Williamson | |
3149 | 5834a83f | Alex Williamson | if (vdev->reset_works) {
|
3150 | 5834a83f | Alex Williamson | if (ioctl(vdev->fd, VFIO_DEVICE_RESET)) {
|
3151 | 5834a83f | Alex Williamson | error_report("vfio: Error unable to reset physical device "
|
3152 | 312fd5f2 | Markus Armbruster | "(%04x:%02x:%02x.%x): %m", vdev->host.domain,
|
3153 | 5834a83f | Alex Williamson | vdev->host.bus, vdev->host.slot, vdev->host.function); |
3154 | 5834a83f | Alex Williamson | } |
3155 | 65501a74 | Alex Williamson | } |
3156 | 5834a83f | Alex Williamson | |
3157 | 5834a83f | Alex Williamson | vfio_enable_intx(vdev); |
3158 | 65501a74 | Alex Williamson | } |
3159 | 65501a74 | Alex Williamson | |
3160 | 65501a74 | Alex Williamson | static Property vfio_pci_dev_properties[] = {
|
3161 | 65501a74 | Alex Williamson | DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
|
3162 | ea486926 | Alex Williamson | DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIODevice,
|
3163 | ea486926 | Alex Williamson | intx.mmap_timeout, 1100),
|
3164 | f15689c7 | Alex Williamson | DEFINE_PROP_BIT("x-vga", VFIODevice, features,
|
3165 | f15689c7 | Alex Williamson | VFIO_FEATURE_ENABLE_VGA_BIT, false),
|
3166 | c29029dd | Alex Williamson | DEFINE_PROP_INT32("bootindex", VFIODevice, bootindex, -1), |
3167 | 65501a74 | Alex Williamson | /*
|
3168 | 65501a74 | Alex Williamson | * TODO - support passed fds... is this necessary?
|
3169 | 65501a74 | Alex Williamson | * DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
|
3170 | 65501a74 | Alex Williamson | * DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name),
|
3171 | 65501a74 | Alex Williamson | */
|
3172 | 65501a74 | Alex Williamson | DEFINE_PROP_END_OF_LIST(), |
3173 | 65501a74 | Alex Williamson | }; |
3174 | 65501a74 | Alex Williamson | |
3175 | d9f0e638 | Alex Williamson | static const VMStateDescription vfio_pci_vmstate = { |
3176 | d9f0e638 | Alex Williamson | .name = "vfio-pci",
|
3177 | d9f0e638 | Alex Williamson | .unmigratable = 1,
|
3178 | d9f0e638 | Alex Williamson | }; |
3179 | 65501a74 | Alex Williamson | |
3180 | 65501a74 | Alex Williamson | static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) |
3181 | 65501a74 | Alex Williamson | { |
3182 | 65501a74 | Alex Williamson | DeviceClass *dc = DEVICE_CLASS(klass); |
3183 | 65501a74 | Alex Williamson | PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); |
3184 | 65501a74 | Alex Williamson | |
3185 | 65501a74 | Alex Williamson | dc->reset = vfio_pci_reset; |
3186 | 65501a74 | Alex Williamson | dc->props = vfio_pci_dev_properties; |
3187 | d9f0e638 | Alex Williamson | dc->vmsd = &vfio_pci_vmstate; |
3188 | d9f0e638 | Alex Williamson | dc->desc = "VFIO-based PCI device assignment";
|
3189 | 65501a74 | Alex Williamson | pdc->init = vfio_initfn; |
3190 | 65501a74 | Alex Williamson | pdc->exit = vfio_exitfn; |
3191 | 65501a74 | Alex Williamson | pdc->config_read = vfio_pci_read_config; |
3192 | 65501a74 | Alex Williamson | pdc->config_write = vfio_pci_write_config; |
3193 | 6a659bbf | Alex Williamson | pdc->is_express = 1; /* We might be */ |
3194 | 65501a74 | Alex Williamson | } |
3195 | 65501a74 | Alex Williamson | |
3196 | 65501a74 | Alex Williamson | static const TypeInfo vfio_pci_dev_info = { |
3197 | 65501a74 | Alex Williamson | .name = "vfio-pci",
|
3198 | 65501a74 | Alex Williamson | .parent = TYPE_PCI_DEVICE, |
3199 | 65501a74 | Alex Williamson | .instance_size = sizeof(VFIODevice),
|
3200 | 65501a74 | Alex Williamson | .class_init = vfio_pci_dev_class_init, |
3201 | 65501a74 | Alex Williamson | }; |
3202 | 65501a74 | Alex Williamson | |
3203 | 65501a74 | Alex Williamson | static void register_vfio_pci_dev_type(void) |
3204 | 65501a74 | Alex Williamson | { |
3205 | 65501a74 | Alex Williamson | type_register_static(&vfio_pci_dev_info); |
3206 | 65501a74 | Alex Williamson | } |
3207 | 65501a74 | Alex Williamson | |
3208 | 65501a74 | Alex Williamson | type_init(register_vfio_pci_dev_type) |