Statistics
| Branch: | Revision:

root / hw / msix.c @ 7c9958b0

History | View | Annotate | Download (14.8 kB)

1
/*
2
 * MSI-X device support
3
 *
4
 * This module includes support for MSI-X in pci devices.
5
 *
6
 * Author: Michael S. Tsirkin <mst@redhat.com>
7
 *
8
 *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
9
 *
10
 * This work is licensed under the terms of the GNU GPL, version 2.  See
11
 * the COPYING file in the top-level directory.
12
 *
13
 * Contributions after 2012-01-13 are licensed under the terms of the
14
 * GNU GPL, version 2 or (at your option) any later version.
15
 */
16

    
17
#include "hw.h"
18
#include "msi.h"
19
#include "msix.h"
20
#include "pci.h"
21
#include "range.h"
22

    
23
#define MSIX_CAP_LENGTH 12
24

    
25
/* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
26
#define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
27
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
28
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
29

    
30
/* How much space does an MSIX table need. */
31
/* The spec requires giving the table structure
32
 * a 4K aligned region all by itself. */
33
#define MSIX_PAGE_SIZE 0x1000
34
/* Reserve second half of the page for pending bits */
35
#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
36
#define MSIX_MAX_ENTRIES 32
37

    
38
static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
39
{
40
    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
41
    MSIMessage msg;
42

    
43
    msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
44
    msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
45
    return msg;
46
}
47

    
48
/* Add MSI-X capability to the config space for the device. */
49
/* Given a bar and its size, add MSI-X table on top of it
50
 * and fill MSI-X capability in the config space.
51
 * Original bar size must be a power of 2 or 0.
52
 * New bar size is returned. */
53
static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
54
                           unsigned bar_nr, unsigned bar_size)
55
{
56
    int config_offset;
57
    uint8_t *config;
58
    uint32_t new_size;
59

    
60
    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
61
        return -EINVAL;
62
    if (bar_size > 0x80000000)
63
        return -ENOSPC;
64

    
65
    /* Add space for MSI-X structures */
66
    if (!bar_size) {
67
        new_size = MSIX_PAGE_SIZE;
68
    } else if (bar_size < MSIX_PAGE_SIZE) {
69
        bar_size = MSIX_PAGE_SIZE;
70
        new_size = MSIX_PAGE_SIZE * 2;
71
    } else {
72
        new_size = bar_size * 2;
73
    }
74

    
75
    pdev->msix_bar_size = new_size;
76
    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
77
                                       0, MSIX_CAP_LENGTH);
78
    if (config_offset < 0)
79
        return config_offset;
80
    config = pdev->config + config_offset;
81

    
82
    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
83
    /* Table on top of BAR */
84
    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
85
    /* Pending bits on top of that */
86
    pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
87
                 bar_nr);
88
    pdev->msix_cap = config_offset;
89
    /* Make flags bit writable. */
90
    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
91
            MSIX_MASKALL_MASK;
92
    pdev->msix_function_masked = true;
93
    return 0;
94
}
95

    
96
static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
97
                               unsigned size)
98
{
99
    PCIDevice *dev = opaque;
100
    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
101
    void *page = dev->msix_table_page;
102

    
103
    return pci_get_long(page + offset);
104
}
105

    
106
static uint8_t msix_pending_mask(int vector)
107
{
108
    return 1 << (vector % 8);
109
}
110

    
111
static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
112
{
113
    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
114
}
115

    
116
static int msix_is_pending(PCIDevice *dev, int vector)
117
{
118
    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
119
}
120

    
121
static void msix_set_pending(PCIDevice *dev, int vector)
122
{
123
    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
124
}
125

    
126
static void msix_clr_pending(PCIDevice *dev, int vector)
127
{
128
    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
129
}
130

    
131
static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask)
132
{
133
    unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
134
    return fmask || dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
135
}
136

    
137
static bool msix_is_masked(PCIDevice *dev, int vector)
138
{
139
    return msix_vector_masked(dev, vector, dev->msix_function_masked);
140
}
141

    
142
static void msix_fire_vector_notifier(PCIDevice *dev,
143
                                      unsigned int vector, bool is_masked)
144
{
145
    MSIMessage msg;
146
    int ret;
147

    
148
    if (!dev->msix_vector_use_notifier) {
149
        return;
150
    }
151
    if (is_masked) {
152
        dev->msix_vector_release_notifier(dev, vector);
153
    } else {
154
        msg = msix_get_message(dev, vector);
155
        ret = dev->msix_vector_use_notifier(dev, vector, msg);
156
        assert(ret >= 0);
157
    }
158
}
159

    
160
static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
161
{
162
    bool is_masked = msix_is_masked(dev, vector);
163

    
164
    if (is_masked == was_masked) {
165
        return;
166
    }
167

    
168
    msix_fire_vector_notifier(dev, vector, is_masked);
169

    
170
    if (!is_masked && msix_is_pending(dev, vector)) {
171
        msix_clr_pending(dev, vector);
172
        msix_notify(dev, vector);
173
    }
174
}
175

    
176
static void msix_update_function_masked(PCIDevice *dev)
177
{
178
    dev->msix_function_masked = !msix_enabled(dev) ||
179
        (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK);
180
}
181

    
182
/* Handle MSI-X capability config write. */
183
void msix_write_config(PCIDevice *dev, uint32_t addr,
184
                       uint32_t val, int len)
185
{
186
    unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
187
    int vector;
188
    bool was_masked;
189

    
190
    if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
191
        return;
192
    }
193

    
194
    was_masked = dev->msix_function_masked;
195
    msix_update_function_masked(dev);
196

    
197
    if (!msix_enabled(dev)) {
198
        return;
199
    }
200

    
201
    pci_device_deassert_intx(dev);
202

    
203
    if (dev->msix_function_masked == was_masked) {
204
        return;
205
    }
206

    
207
    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
208
        msix_handle_mask_update(dev, vector,
209
                                msix_vector_masked(dev, vector, was_masked));
210
    }
211
}
212

    
213
static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
214
                            uint64_t val, unsigned size)
215
{
216
    PCIDevice *dev = opaque;
217
    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
218
    int vector = offset / PCI_MSIX_ENTRY_SIZE;
219
    bool was_masked;
220

    
221
    /* MSI-X page includes a read-only PBA and a writeable Vector Control. */
222
    if (vector >= dev->msix_entries_nr) {
223
        return;
224
    }
225

    
226
    was_masked = msix_is_masked(dev, vector);
227
    pci_set_long(dev->msix_table_page + offset, val);
228
    msix_handle_mask_update(dev, vector, was_masked);
229
}
230

    
231
static const MemoryRegionOps msix_mmio_ops = {
232
    .read = msix_mmio_read,
233
    .write = msix_mmio_write,
234
    .endianness = DEVICE_NATIVE_ENDIAN,
235
    .valid = {
236
        .min_access_size = 4,
237
        .max_access_size = 4,
238
    },
239
};
240

    
241
static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
242
{
243
    uint8_t *config = d->config + d->msix_cap;
244
    uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
245
    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
246
    /* TODO: for assigned devices, we'll want to make it possible to map
247
     * pending bits separately in case they are in a separate bar. */
248

    
249
    memory_region_add_subregion(bar, offset, &d->msix_mmio);
250
}
251

    
252
static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
253
{
254
    int vector;
255

    
256
    for (vector = 0; vector < nentries; ++vector) {
257
        unsigned offset =
258
            vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
259
        bool was_masked = msix_is_masked(dev, vector);
260

    
261
        dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
262
        msix_handle_mask_update(dev, vector, was_masked);
263
    }
264
}
265

    
266
/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
267
 * modified, it should be retrieved with msix_bar_size. */
268
int msix_init(struct PCIDevice *dev, unsigned short nentries,
269
              MemoryRegion *bar,
270
              unsigned bar_nr, unsigned bar_size)
271
{
272
    int ret;
273

    
274
    /* Nothing to do if MSI is not supported by interrupt controller */
275
    if (!msi_supported) {
276
        return -ENOTSUP;
277
    }
278
    if (nentries > MSIX_MAX_ENTRIES)
279
        return -EINVAL;
280

    
281
    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
282
                                        sizeof *dev->msix_entry_used);
283

    
284
    dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE);
285
    msix_mask_all(dev, nentries);
286

    
287
    memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev,
288
                          "msix", MSIX_PAGE_SIZE);
289

    
290
    dev->msix_entries_nr = nentries;
291
    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
292
    if (ret)
293
        goto err_config;
294

    
295
    dev->cap_present |= QEMU_PCI_CAP_MSIX;
296
    msix_mmio_setup(dev, bar);
297
    return 0;
298

    
299
err_config:
300
    dev->msix_entries_nr = 0;
301
    memory_region_destroy(&dev->msix_mmio);
302
    g_free(dev->msix_table_page);
303
    dev->msix_table_page = NULL;
304
    g_free(dev->msix_entry_used);
305
    dev->msix_entry_used = NULL;
306
    return ret;
307
}
308

    
309
static void msix_free_irq_entries(PCIDevice *dev)
310
{
311
    int vector;
312

    
313
    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
314
        dev->msix_entry_used[vector] = 0;
315
        msix_clr_pending(dev, vector);
316
    }
317
}
318

    
319
/* Clean up resources for the device. */
320
int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
321
{
322
    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
323
        return 0;
324
    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
325
    dev->msix_cap = 0;
326
    msix_free_irq_entries(dev);
327
    dev->msix_entries_nr = 0;
328
    memory_region_del_subregion(bar, &dev->msix_mmio);
329
    memory_region_destroy(&dev->msix_mmio);
330
    g_free(dev->msix_table_page);
331
    dev->msix_table_page = NULL;
332
    g_free(dev->msix_entry_used);
333
    dev->msix_entry_used = NULL;
334
    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
335
    return 0;
336
}
337

    
338
void msix_save(PCIDevice *dev, QEMUFile *f)
339
{
340
    unsigned n = dev->msix_entries_nr;
341

    
342
    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
343
        return;
344
    }
345

    
346
    qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
347
    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
348
}
349

    
350
/* Should be called after restoring the config space. */
351
void msix_load(PCIDevice *dev, QEMUFile *f)
352
{
353
    unsigned n = dev->msix_entries_nr;
354
    unsigned int vector;
355

    
356
    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
357
        return;
358
    }
359

    
360
    msix_free_irq_entries(dev);
361
    qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
362
    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
363
    msix_update_function_masked(dev);
364

    
365
    for (vector = 0; vector < n; vector++) {
366
        msix_handle_mask_update(dev, vector, true);
367
    }
368
}
369

    
370
/* Does device support MSI-X? */
371
int msix_present(PCIDevice *dev)
372
{
373
    return dev->cap_present & QEMU_PCI_CAP_MSIX;
374
}
375

    
376
/* Is MSI-X enabled? */
377
int msix_enabled(PCIDevice *dev)
378
{
379
    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
380
        (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
381
         MSIX_ENABLE_MASK);
382
}
383

    
384
/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
385
uint32_t msix_bar_size(PCIDevice *dev)
386
{
387
    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
388
        dev->msix_bar_size : 0;
389
}
390

    
391
/* Send an MSI-X message */
392
void msix_notify(PCIDevice *dev, unsigned vector)
393
{
394
    MSIMessage msg;
395

    
396
    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
397
        return;
398
    if (msix_is_masked(dev, vector)) {
399
        msix_set_pending(dev, vector);
400
        return;
401
    }
402

    
403
    msg = msix_get_message(dev, vector);
404

    
405
    stl_le_phys(msg.address, msg.data);
406
}
407

    
408
void msix_reset(PCIDevice *dev)
409
{
410
    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
411
        return;
412
    msix_free_irq_entries(dev);
413
    dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
414
            ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
415
    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
416
    msix_mask_all(dev, dev->msix_entries_nr);
417
}
418

    
419
/* PCI spec suggests that devices make it possible for software to configure
420
 * less vectors than supported by the device, but does not specify a standard
421
 * mechanism for devices to do so.
422
 *
423
 * We support this by asking devices to declare vectors software is going to
424
 * actually use, and checking this on the notification path. Devices that
425
 * don't want to follow the spec suggestion can declare all vectors as used. */
426

    
427
/* Mark vector as used. */
428
int msix_vector_use(PCIDevice *dev, unsigned vector)
429
{
430
    if (vector >= dev->msix_entries_nr)
431
        return -EINVAL;
432
    dev->msix_entry_used[vector]++;
433
    return 0;
434
}
435

    
436
/* Mark vector as unused. */
437
void msix_vector_unuse(PCIDevice *dev, unsigned vector)
438
{
439
    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
440
        return;
441
    }
442
    if (--dev->msix_entry_used[vector]) {
443
        return;
444
    }
445
    msix_clr_pending(dev, vector);
446
}
447

    
448
void msix_unuse_all_vectors(PCIDevice *dev)
449
{
450
    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
451
        return;
452
    msix_free_irq_entries(dev);
453
}
454

    
455
unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
456
{
457
    return dev->msix_entries_nr;
458
}
459

    
460
static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
461
{
462
    MSIMessage msg;
463

    
464
    if (msix_is_masked(dev, vector)) {
465
        return 0;
466
    }
467
    msg = msix_get_message(dev, vector);
468
    return dev->msix_vector_use_notifier(dev, vector, msg);
469
}
470

    
471
static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
472
{
473
    if (msix_is_masked(dev, vector)) {
474
        return;
475
    }
476
    dev->msix_vector_release_notifier(dev, vector);
477
}
478

    
479
int msix_set_vector_notifiers(PCIDevice *dev,
480
                              MSIVectorUseNotifier use_notifier,
481
                              MSIVectorReleaseNotifier release_notifier)
482
{
483
    int vector, ret;
484

    
485
    assert(use_notifier && release_notifier);
486

    
487
    dev->msix_vector_use_notifier = use_notifier;
488
    dev->msix_vector_release_notifier = release_notifier;
489

    
490
    if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
491
        (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
492
        for (vector = 0; vector < dev->msix_entries_nr; vector++) {
493
            ret = msix_set_notifier_for_vector(dev, vector);
494
            if (ret < 0) {
495
                goto undo;
496
            }
497
        }
498
    }
499
    return 0;
500

    
501
undo:
502
    while (--vector >= 0) {
503
        msix_unset_notifier_for_vector(dev, vector);
504
    }
505
    dev->msix_vector_use_notifier = NULL;
506
    dev->msix_vector_release_notifier = NULL;
507
    return ret;
508
}
509

    
510
void msix_unset_vector_notifiers(PCIDevice *dev)
511
{
512
    int vector;
513

    
514
    assert(dev->msix_vector_use_notifier &&
515
           dev->msix_vector_release_notifier);
516

    
517
    if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
518
        (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
519
        for (vector = 0; vector < dev->msix_entries_nr; vector++) {
520
            msix_unset_notifier_for_vector(dev, vector);
521
        }
522
    }
523
    dev->msix_vector_use_notifier = NULL;
524
    dev->msix_vector_release_notifier = NULL;
525
}