Statistics
| Branch: | Revision:

root / hw / msix.c @ 572992ee

History | View | Annotate | Download (15.2 kB)

1
/*
2
 * MSI-X device support
3
 *
4
 * This module includes support for MSI-X in pci devices.
5
 *
6
 * Author: Michael S. Tsirkin <mst@redhat.com>
7
 *
8
 *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
9
 *
10
 * This work is licensed under the terms of the GNU GPL, version 2.  See
11
 * the COPYING file in the top-level directory.
12
 *
13
 * Contributions after 2012-01-13 are licensed under the terms of the
14
 * GNU GPL, version 2 or (at your option) any later version.
15
 */
16

    
17
#include "hw.h"
18
#include "msi.h"
19
#include "msix.h"
20
#include "pci.h"
21
#include "range.h"
22

    
23
#define MSIX_CAP_LENGTH 12
24

    
25
/* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
26
#define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
27
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
28
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
29

    
30
static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
31
{
32
    uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
33
    MSIMessage msg;
34

    
35
    msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
36
    msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
37
    return msg;
38
}
39

    
40
static uint8_t msix_pending_mask(int vector)
41
{
42
    return 1 << (vector % 8);
43
}
44

    
45
static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
46
{
47
    return dev->msix_pba + vector / 8;
48
}
49

    
50
static int msix_is_pending(PCIDevice *dev, int vector)
51
{
52
    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
53
}
54

    
55
static void msix_set_pending(PCIDevice *dev, int vector)
56
{
57
    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
58
}
59

    
60
static void msix_clr_pending(PCIDevice *dev, int vector)
61
{
62
    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
63
}
64

    
65
static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask)
66
{
67
    unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
68
    return fmask || dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
69
}
70

    
71
static bool msix_is_masked(PCIDevice *dev, int vector)
72
{
73
    return msix_vector_masked(dev, vector, dev->msix_function_masked);
74
}
75

    
76
static void msix_fire_vector_notifier(PCIDevice *dev,
77
                                      unsigned int vector, bool is_masked)
78
{
79
    MSIMessage msg;
80
    int ret;
81

    
82
    if (!dev->msix_vector_use_notifier) {
83
        return;
84
    }
85
    if (is_masked) {
86
        dev->msix_vector_release_notifier(dev, vector);
87
    } else {
88
        msg = msix_get_message(dev, vector);
89
        ret = dev->msix_vector_use_notifier(dev, vector, msg);
90
        assert(ret >= 0);
91
    }
92
}
93

    
94
static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
95
{
96
    bool is_masked = msix_is_masked(dev, vector);
97

    
98
    if (is_masked == was_masked) {
99
        return;
100
    }
101

    
102
    msix_fire_vector_notifier(dev, vector, is_masked);
103

    
104
    if (!is_masked && msix_is_pending(dev, vector)) {
105
        msix_clr_pending(dev, vector);
106
        msix_notify(dev, vector);
107
    }
108
}
109

    
110
static void msix_update_function_masked(PCIDevice *dev)
111
{
112
    dev->msix_function_masked = !msix_enabled(dev) ||
113
        (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK);
114
}
115

    
116
/* Handle MSI-X capability config write. */
117
void msix_write_config(PCIDevice *dev, uint32_t addr,
118
                       uint32_t val, int len)
119
{
120
    unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
121
    int vector;
122
    bool was_masked;
123

    
124
    if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
125
        return;
126
    }
127

    
128
    was_masked = dev->msix_function_masked;
129
    msix_update_function_masked(dev);
130

    
131
    if (!msix_enabled(dev)) {
132
        return;
133
    }
134

    
135
    pci_device_deassert_intx(dev);
136

    
137
    if (dev->msix_function_masked == was_masked) {
138
        return;
139
    }
140

    
141
    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
142
        msix_handle_mask_update(dev, vector,
143
                                msix_vector_masked(dev, vector, was_masked));
144
    }
145
}
146

    
147
static uint64_t msix_table_mmio_read(void *opaque, target_phys_addr_t addr,
148
                                     unsigned size)
149
{
150
    PCIDevice *dev = opaque;
151

    
152
    return pci_get_long(dev->msix_table + addr);
153
}
154

    
155
static void msix_table_mmio_write(void *opaque, target_phys_addr_t addr,
156
                                  uint64_t val, unsigned size)
157
{
158
    PCIDevice *dev = opaque;
159
    int vector = addr / PCI_MSIX_ENTRY_SIZE;
160
    bool was_masked;
161

    
162
    was_masked = msix_is_masked(dev, vector);
163
    pci_set_long(dev->msix_table + addr, val);
164
    msix_handle_mask_update(dev, vector, was_masked);
165
}
166

    
167
static const MemoryRegionOps msix_table_mmio_ops = {
168
    .read = msix_table_mmio_read,
169
    .write = msix_table_mmio_write,
170
    /* TODO: MSIX should be LITTLE_ENDIAN. */
171
    .endianness = DEVICE_NATIVE_ENDIAN,
172
    .valid = {
173
        .min_access_size = 4,
174
        .max_access_size = 4,
175
    },
176
};
177

    
178
static uint64_t msix_pba_mmio_read(void *opaque, target_phys_addr_t addr,
179
                                   unsigned size)
180
{
181
    PCIDevice *dev = opaque;
182

    
183
    return pci_get_long(dev->msix_pba + addr);
184
}
185

    
186
static const MemoryRegionOps msix_pba_mmio_ops = {
187
    .read = msix_pba_mmio_read,
188
    /* TODO: MSIX should be LITTLE_ENDIAN. */
189
    .endianness = DEVICE_NATIVE_ENDIAN,
190
    .valid = {
191
        .min_access_size = 4,
192
        .max_access_size = 4,
193
    },
194
};
195

    
196
static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
197
{
198
    int vector;
199

    
200
    for (vector = 0; vector < nentries; ++vector) {
201
        unsigned offset =
202
            vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
203
        bool was_masked = msix_is_masked(dev, vector);
204

    
205
        dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
206
        msix_handle_mask_update(dev, vector, was_masked);
207
    }
208
}
209

    
210
/* Initialize the MSI-X structures */
211
int msix_init(struct PCIDevice *dev, unsigned short nentries,
212
              MemoryRegion *table_bar, uint8_t table_bar_nr,
213
              unsigned table_offset, MemoryRegion *pba_bar,
214
              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
215
{
216
    int cap;
217
    unsigned table_size, pba_size;
218
    uint8_t *config;
219

    
220
    /* Nothing to do if MSI is not supported by interrupt controller */
221
    if (!msi_supported) {
222
        return -ENOTSUP;
223
    }
224

    
225
    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
226
        return -EINVAL;
227
    }
228

    
229
    table_size = nentries * PCI_MSIX_ENTRY_SIZE;
230
    pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
231

    
232
    /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
233
    if ((table_bar_nr == pba_bar_nr &&
234
         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
235
        table_offset + table_size > memory_region_size(table_bar) ||
236
        pba_offset + pba_size > memory_region_size(pba_bar) ||
237
        (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
238
        return -EINVAL;
239
    }
240

    
241
    cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, cap_pos, MSIX_CAP_LENGTH);
242
    if (cap < 0) {
243
        return cap;
244
    }
245

    
246
    dev->msix_cap = cap;
247
    dev->cap_present |= QEMU_PCI_CAP_MSIX;
248
    config = dev->config + cap;
249

    
250
    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
251
    dev->msix_entries_nr = nentries;
252
    dev->msix_function_masked = true;
253

    
254
    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
255
    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
256

    
257
    /* Make flags bit writable. */
258
    dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
259
                                             MSIX_MASKALL_MASK;
260

    
261
    dev->msix_table = g_malloc0(table_size);
262
    dev->msix_pba = g_malloc0(pba_size);
263
    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
264

    
265
    msix_mask_all(dev, nentries);
266

    
267
    memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
268
                          "msix-table", table_size);
269
    memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
270
    memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
271
                          "msix-pba", pba_size);
272
    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
273

    
274
    return 0;
275
}
276

    
277
int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
278
                            uint8_t bar_nr)
279
{
280
    int ret;
281
    char *name;
282

    
283
    /*
284
     * Migration compatibility dictates that this remains a 4k
285
     * BAR with the vector table in the lower half and PBA in
286
     * the upper half.  Do not use these elsewhere!
287
     */
288
#define MSIX_EXCLUSIVE_BAR_SIZE 4096
289
#define MSIX_EXCLUSIVE_BAR_TABLE_OFFSET 0
290
#define MSIX_EXCLUSIVE_BAR_PBA_OFFSET (MSIX_EXCLUSIVE_BAR_SIZE / 2)
291
#define MSIX_EXCLUSIVE_CAP_OFFSET 0
292

    
293
    if (nentries * PCI_MSIX_ENTRY_SIZE > MSIX_EXCLUSIVE_BAR_PBA_OFFSET) {
294
        return -EINVAL;
295
    }
296

    
297
    if (asprintf(&name, "%s-msix", dev->name) == -1) {
298
        return -ENOMEM;
299
    }
300

    
301
    memory_region_init(&dev->msix_exclusive_bar, name, MSIX_EXCLUSIVE_BAR_SIZE);
302

    
303
    free(name);
304

    
305
    ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
306
                    MSIX_EXCLUSIVE_BAR_TABLE_OFFSET, &dev->msix_exclusive_bar,
307
                    bar_nr, MSIX_EXCLUSIVE_BAR_PBA_OFFSET,
308
                    MSIX_EXCLUSIVE_CAP_OFFSET);
309
    if (ret) {
310
        memory_region_destroy(&dev->msix_exclusive_bar);
311
        return ret;
312
    }
313

    
314
    pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
315
                     &dev->msix_exclusive_bar);
316

    
317
    return 0;
318
}
319

    
320
static void msix_free_irq_entries(PCIDevice *dev)
321
{
322
    int vector;
323

    
324
    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
325
        dev->msix_entry_used[vector] = 0;
326
        msix_clr_pending(dev, vector);
327
    }
328
}
329

    
330
/* Clean up resources for the device. */
331
void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
332
{
333
    if (!msix_present(dev)) {
334
        return;
335
    }
336
    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
337
    dev->msix_cap = 0;
338
    msix_free_irq_entries(dev);
339
    dev->msix_entries_nr = 0;
340
    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
341
    memory_region_destroy(&dev->msix_pba_mmio);
342
    g_free(dev->msix_pba);
343
    dev->msix_pba = NULL;
344
    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
345
    memory_region_destroy(&dev->msix_table_mmio);
346
    g_free(dev->msix_table);
347
    dev->msix_table = NULL;
348
    g_free(dev->msix_entry_used);
349
    dev->msix_entry_used = NULL;
350
    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
351
    return;
352
}
353

    
354
void msix_uninit_exclusive_bar(PCIDevice *dev)
355
{
356
    if (msix_present(dev)) {
357
        msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
358
        memory_region_destroy(&dev->msix_exclusive_bar);
359
    }
360
}
361

    
362
void msix_save(PCIDevice *dev, QEMUFile *f)
363
{
364
    unsigned n = dev->msix_entries_nr;
365

    
366
    if (!msix_present(dev)) {
367
        return;
368
    }
369

    
370
    qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
371
    qemu_put_buffer(f, dev->msix_pba, (n + 7) / 8);
372
}
373

    
374
/* Should be called after restoring the config space. */
375
void msix_load(PCIDevice *dev, QEMUFile *f)
376
{
377
    unsigned n = dev->msix_entries_nr;
378
    unsigned int vector;
379

    
380
    if (!msix_present(dev)) {
381
        return;
382
    }
383

    
384
    msix_free_irq_entries(dev);
385
    qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
386
    qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8);
387
    msix_update_function_masked(dev);
388

    
389
    for (vector = 0; vector < n; vector++) {
390
        msix_handle_mask_update(dev, vector, true);
391
    }
392
}
393

    
394
/* Does device support MSI-X? */
395
int msix_present(PCIDevice *dev)
396
{
397
    return dev->cap_present & QEMU_PCI_CAP_MSIX;
398
}
399

    
400
/* Is MSI-X enabled? */
401
int msix_enabled(PCIDevice *dev)
402
{
403
    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
404
        (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
405
         MSIX_ENABLE_MASK);
406
}
407

    
408
/* Send an MSI-X message */
409
void msix_notify(PCIDevice *dev, unsigned vector)
410
{
411
    MSIMessage msg;
412

    
413
    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
414
        return;
415
    if (msix_is_masked(dev, vector)) {
416
        msix_set_pending(dev, vector);
417
        return;
418
    }
419

    
420
    msg = msix_get_message(dev, vector);
421

    
422
    stl_le_phys(msg.address, msg.data);
423
}
424

    
425
void msix_reset(PCIDevice *dev)
426
{
427
    if (!msix_present(dev)) {
428
        return;
429
    }
430
    msix_free_irq_entries(dev);
431
    dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
432
            ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
433
    memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
434
    memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
435
    msix_mask_all(dev, dev->msix_entries_nr);
436
}
437

    
438
/* PCI spec suggests that devices make it possible for software to configure
439
 * less vectors than supported by the device, but does not specify a standard
440
 * mechanism for devices to do so.
441
 *
442
 * We support this by asking devices to declare vectors software is going to
443
 * actually use, and checking this on the notification path. Devices that
444
 * don't want to follow the spec suggestion can declare all vectors as used. */
445

    
446
/* Mark vector as used. */
447
int msix_vector_use(PCIDevice *dev, unsigned vector)
448
{
449
    if (vector >= dev->msix_entries_nr)
450
        return -EINVAL;
451
    dev->msix_entry_used[vector]++;
452
    return 0;
453
}
454

    
455
/* Mark vector as unused. */
456
void msix_vector_unuse(PCIDevice *dev, unsigned vector)
457
{
458
    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
459
        return;
460
    }
461
    if (--dev->msix_entry_used[vector]) {
462
        return;
463
    }
464
    msix_clr_pending(dev, vector);
465
}
466

    
467
void msix_unuse_all_vectors(PCIDevice *dev)
468
{
469
    if (!msix_present(dev)) {
470
        return;
471
    }
472
    msix_free_irq_entries(dev);
473
}
474

    
475
unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
476
{
477
    return dev->msix_entries_nr;
478
}
479

    
480
static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
481
{
482
    MSIMessage msg;
483

    
484
    if (msix_is_masked(dev, vector)) {
485
        return 0;
486
    }
487
    msg = msix_get_message(dev, vector);
488
    return dev->msix_vector_use_notifier(dev, vector, msg);
489
}
490

    
491
static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
492
{
493
    if (msix_is_masked(dev, vector)) {
494
        return;
495
    }
496
    dev->msix_vector_release_notifier(dev, vector);
497
}
498

    
499
int msix_set_vector_notifiers(PCIDevice *dev,
500
                              MSIVectorUseNotifier use_notifier,
501
                              MSIVectorReleaseNotifier release_notifier)
502
{
503
    int vector, ret;
504

    
505
    assert(use_notifier && release_notifier);
506

    
507
    dev->msix_vector_use_notifier = use_notifier;
508
    dev->msix_vector_release_notifier = release_notifier;
509

    
510
    if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
511
        (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
512
        for (vector = 0; vector < dev->msix_entries_nr; vector++) {
513
            ret = msix_set_notifier_for_vector(dev, vector);
514
            if (ret < 0) {
515
                goto undo;
516
            }
517
        }
518
    }
519
    return 0;
520

    
521
undo:
522
    while (--vector >= 0) {
523
        msix_unset_notifier_for_vector(dev, vector);
524
    }
525
    dev->msix_vector_use_notifier = NULL;
526
    dev->msix_vector_release_notifier = NULL;
527
    return ret;
528
}
529

    
530
void msix_unset_vector_notifiers(PCIDevice *dev)
531
{
532
    int vector;
533

    
534
    assert(dev->msix_vector_use_notifier &&
535
           dev->msix_vector_release_notifier);
536

    
537
    if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
538
        (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
539
        for (vector = 0; vector < dev->msix_entries_nr; vector++) {
540
            msix_unset_notifier_for_vector(dev, vector);
541
        }
542
    }
543
    dev->msix_vector_use_notifier = NULL;
544
    dev->msix_vector_release_notifier = NULL;
545
}