Statistics
| Branch: | Revision:

root / kvm-all.c @ feature-archipelago

History | View | Annotate | Download (53.2 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "qemu/atomic.h"
25
#include "qemu/option.h"
26
#include "qemu/config-file.h"
27
#include "sysemu/sysemu.h"
28
#include "hw/hw.h"
29
#include "hw/pci/msi.h"
30
#include "exec/gdbstub.h"
31
#include "sysemu/kvm.h"
32
#include "qemu/bswap.h"
33
#include "exec/memory.h"
34
#include "exec/ram_addr.h"
35
#include "exec/address-spaces.h"
36
#include "qemu/event_notifier.h"
37
#include "trace.h"
38

    
39
/* This check must be after config-host.h is included */
40
#ifdef CONFIG_EVENTFD
41
#include <sys/eventfd.h>
42
#endif
43

    
44
#ifdef CONFIG_VALGRIND_H
45
#include <valgrind/memcheck.h>
46
#endif
47

    
48
/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
49
#define PAGE_SIZE TARGET_PAGE_SIZE
50

    
51
//#define DEBUG_KVM
52

    
53
#ifdef DEBUG_KVM
54
#define DPRINTF(fmt, ...) \
55
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56
#else
57
#define DPRINTF(fmt, ...) \
58
    do { } while (0)
59
#endif
60

    
61
#define KVM_MSI_HASHTAB_SIZE    256
62

    
63
typedef struct KVMSlot
64
{
65
    hwaddr start_addr;
66
    ram_addr_t memory_size;
67
    void *ram;
68
    int slot;
69
    int flags;
70
} KVMSlot;
71

    
72
typedef struct kvm_dirty_log KVMDirtyLog;
73

    
74
struct KVMState
75
{
76
    KVMSlot *slots;
77
    int nr_slots;
78
    int fd;
79
    int vmfd;
80
    int coalesced_mmio;
81
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
82
    bool coalesced_flush_in_progress;
83
    int broken_set_mem_region;
84
    int migration_log;
85
    int vcpu_events;
86
    int robust_singlestep;
87
    int debugregs;
88
#ifdef KVM_CAP_SET_GUEST_DEBUG
89
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
90
#endif
91
    int pit_state2;
92
    int xsave, xcrs;
93
    int many_ioeventfds;
94
    int intx_set_mask;
95
    /* The man page (and posix) say ioctl numbers are signed int, but
96
     * they're not.  Linux, glibc and *BSD all treat ioctl numbers as
97
     * unsigned, and treating them as signed here can break things */
98
    unsigned irq_set_ioctl;
99
#ifdef KVM_CAP_IRQ_ROUTING
100
    struct kvm_irq_routing *irq_routes;
101
    int nr_allocated_irq_routes;
102
    uint32_t *used_gsi_bitmap;
103
    unsigned int gsi_count;
104
    QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
105
    bool direct_msi;
106
#endif
107
};
108

    
109
KVMState *kvm_state;
110
bool kvm_kernel_irqchip;
111
bool kvm_async_interrupts_allowed;
112
bool kvm_halt_in_kernel_allowed;
113
bool kvm_irqfds_allowed;
114
bool kvm_msi_via_irqfd_allowed;
115
bool kvm_gsi_routing_allowed;
116
bool kvm_gsi_direct_mapping;
117
bool kvm_allowed;
118
bool kvm_readonly_mem_allowed;
119

    
120
static const KVMCapabilityInfo kvm_required_capabilites[] = {
121
    KVM_CAP_INFO(USER_MEMORY),
122
    KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
123
    KVM_CAP_LAST_INFO
124
};
125

    
126
static KVMSlot *kvm_alloc_slot(KVMState *s)
127
{
128
    int i;
129

    
130
    for (i = 0; i < s->nr_slots; i++) {
131
        if (s->slots[i].memory_size == 0) {
132
            return &s->slots[i];
133
        }
134
    }
135

    
136
    fprintf(stderr, "%s: no free slot available\n", __func__);
137
    abort();
138
}
139

    
140
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
141
                                         hwaddr start_addr,
142
                                         hwaddr end_addr)
143
{
144
    int i;
145

    
146
    for (i = 0; i < s->nr_slots; i++) {
147
        KVMSlot *mem = &s->slots[i];
148

    
149
        if (start_addr == mem->start_addr &&
150
            end_addr == mem->start_addr + mem->memory_size) {
151
            return mem;
152
        }
153
    }
154

    
155
    return NULL;
156
}
157

    
158
/*
159
 * Find overlapping slot with lowest start address
160
 */
161
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
162
                                            hwaddr start_addr,
163
                                            hwaddr end_addr)
164
{
165
    KVMSlot *found = NULL;
166
    int i;
167

    
168
    for (i = 0; i < s->nr_slots; i++) {
169
        KVMSlot *mem = &s->slots[i];
170

    
171
        if (mem->memory_size == 0 ||
172
            (found && found->start_addr < mem->start_addr)) {
173
            continue;
174
        }
175

    
176
        if (end_addr > mem->start_addr &&
177
            start_addr < mem->start_addr + mem->memory_size) {
178
            found = mem;
179
        }
180
    }
181

    
182
    return found;
183
}
184

    
185
int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
186
                                       hwaddr *phys_addr)
187
{
188
    int i;
189

    
190
    for (i = 0; i < s->nr_slots; i++) {
191
        KVMSlot *mem = &s->slots[i];
192

    
193
        if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
194
            *phys_addr = mem->start_addr + (ram - mem->ram);
195
            return 1;
196
        }
197
    }
198

    
199
    return 0;
200
}
201

    
202
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
203
{
204
    struct kvm_userspace_memory_region mem;
205

    
206
    mem.slot = slot->slot;
207
    mem.guest_phys_addr = slot->start_addr;
208
    mem.userspace_addr = (unsigned long)slot->ram;
209
    mem.flags = slot->flags;
210
    if (s->migration_log) {
211
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
212
    }
213

    
214
    if (slot->memory_size && mem.flags & KVM_MEM_READONLY) {
215
        /* Set the slot size to 0 before setting the slot to the desired
216
         * value. This is needed based on KVM commit 75d61fbc. */
217
        mem.memory_size = 0;
218
        kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
219
    }
220
    mem.memory_size = slot->memory_size;
221
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
222
}
223

    
224
static void kvm_reset_vcpu(void *opaque)
225
{
226
    CPUState *cpu = opaque;
227

    
228
    kvm_arch_reset_vcpu(cpu);
229
}
230

    
231
int kvm_init_vcpu(CPUState *cpu)
232
{
233
    KVMState *s = kvm_state;
234
    long mmap_size;
235
    int ret;
236

    
237
    DPRINTF("kvm_init_vcpu\n");
238

    
239
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu));
240
    if (ret < 0) {
241
        DPRINTF("kvm_create_vcpu failed\n");
242
        goto err;
243
    }
244

    
245
    cpu->kvm_fd = ret;
246
    cpu->kvm_state = s;
247
    cpu->kvm_vcpu_dirty = true;
248

    
249
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
250
    if (mmap_size < 0) {
251
        ret = mmap_size;
252
        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
253
        goto err;
254
    }
255

    
256
    cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
257
                        cpu->kvm_fd, 0);
258
    if (cpu->kvm_run == MAP_FAILED) {
259
        ret = -errno;
260
        DPRINTF("mmap'ing vcpu state failed\n");
261
        goto err;
262
    }
263

    
264
    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
265
        s->coalesced_mmio_ring =
266
            (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
267
    }
268

    
269
    ret = kvm_arch_init_vcpu(cpu);
270
    if (ret == 0) {
271
        qemu_register_reset(kvm_reset_vcpu, cpu);
272
        kvm_arch_reset_vcpu(cpu);
273
    }
274
err:
275
    return ret;
276
}
277

    
278
/*
279
 * dirty pages logging control
280
 */
281

    
282
static int kvm_mem_flags(KVMState *s, bool log_dirty, bool readonly)
283
{
284
    int flags = 0;
285
    flags = log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
286
    if (readonly && kvm_readonly_mem_allowed) {
287
        flags |= KVM_MEM_READONLY;
288
    }
289
    return flags;
290
}
291

    
292
static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
293
{
294
    KVMState *s = kvm_state;
295
    int flags, mask = KVM_MEM_LOG_DIRTY_PAGES;
296
    int old_flags;
297

    
298
    old_flags = mem->flags;
299

    
300
    flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty, false);
301
    mem->flags = flags;
302

    
303
    /* If nothing changed effectively, no need to issue ioctl */
304
    if (s->migration_log) {
305
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
306
    }
307

    
308
    if (flags == old_flags) {
309
        return 0;
310
    }
311

    
312
    return kvm_set_user_memory_region(s, mem);
313
}
314

    
315
static int kvm_dirty_pages_log_change(hwaddr phys_addr,
316
                                      ram_addr_t size, bool log_dirty)
317
{
318
    KVMState *s = kvm_state;
319
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
320

    
321
    if (mem == NULL)  {
322
        fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
323
                TARGET_FMT_plx "\n", __func__, phys_addr,
324
                (hwaddr)(phys_addr + size - 1));
325
        return -EINVAL;
326
    }
327
    return kvm_slot_dirty_pages_log_change(mem, log_dirty);
328
}
329

    
330
static void kvm_log_start(MemoryListener *listener,
331
                          MemoryRegionSection *section)
332
{
333
    int r;
334

    
335
    r = kvm_dirty_pages_log_change(section->offset_within_address_space,
336
                                   int128_get64(section->size), true);
337
    if (r < 0) {
338
        abort();
339
    }
340
}
341

    
342
static void kvm_log_stop(MemoryListener *listener,
343
                          MemoryRegionSection *section)
344
{
345
    int r;
346

    
347
    r = kvm_dirty_pages_log_change(section->offset_within_address_space,
348
                                   int128_get64(section->size), false);
349
    if (r < 0) {
350
        abort();
351
    }
352
}
353

    
354
static int kvm_set_migration_log(int enable)
355
{
356
    KVMState *s = kvm_state;
357
    KVMSlot *mem;
358
    int i, err;
359

    
360
    s->migration_log = enable;
361

    
362
    for (i = 0; i < s->nr_slots; i++) {
363
        mem = &s->slots[i];
364

    
365
        if (!mem->memory_size) {
366
            continue;
367
        }
368
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
369
            continue;
370
        }
371
        err = kvm_set_user_memory_region(s, mem);
372
        if (err) {
373
            return err;
374
        }
375
    }
376
    return 0;
377
}
378

    
379
/* get kvm's dirty pages bitmap and update qemu's */
380
static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
381
                                         unsigned long *bitmap)
382
{
383
    ram_addr_t start = section->offset_within_region + section->mr->ram_addr;
384
    ram_addr_t pages = int128_get64(section->size) / getpagesize();
385

    
386
    cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
387
    return 0;
388
}
389

    
390
#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))
391

    
392
/**
393
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
394
 * This function updates qemu's dirty bitmap using
395
 * memory_region_set_dirty().  This means all bits are set
396
 * to dirty.
397
 *
398
 * @start_add: start of logged region.
399
 * @end_addr: end of logged region.
400
 */
401
static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
402
{
403
    KVMState *s = kvm_state;
404
    unsigned long size, allocated_size = 0;
405
    KVMDirtyLog d;
406
    KVMSlot *mem;
407
    int ret = 0;
408
    hwaddr start_addr = section->offset_within_address_space;
409
    hwaddr end_addr = start_addr + int128_get64(section->size);
410

    
411
    d.dirty_bitmap = NULL;
412
    while (start_addr < end_addr) {
413
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
414
        if (mem == NULL) {
415
            break;
416
        }
417

    
418
        /* XXX bad kernel interface alert
419
         * For dirty bitmap, kernel allocates array of size aligned to
420
         * bits-per-long.  But for case when the kernel is 64bits and
421
         * the userspace is 32bits, userspace can't align to the same
422
         * bits-per-long, since sizeof(long) is different between kernel
423
         * and user space.  This way, userspace will provide buffer which
424
         * may be 4 bytes less than the kernel will use, resulting in
425
         * userspace memory corruption (which is not detectable by valgrind
426
         * too, in most cases).
427
         * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
428
         * a hope that sizeof(long) wont become >8 any time soon.
429
         */
430
        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
431
                     /*HOST_LONG_BITS*/ 64) / 8;
432
        if (!d.dirty_bitmap) {
433
            d.dirty_bitmap = g_malloc(size);
434
        } else if (size > allocated_size) {
435
            d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
436
        }
437
        allocated_size = size;
438
        memset(d.dirty_bitmap, 0, allocated_size);
439

    
440
        d.slot = mem->slot;
441

    
442
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
443
            DPRINTF("ioctl failed %d\n", errno);
444
            ret = -1;
445
            break;
446
        }
447

    
448
        kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
449
        start_addr = mem->start_addr + mem->memory_size;
450
    }
451
    g_free(d.dirty_bitmap);
452

    
453
    return ret;
454
}
455

    
456
static void kvm_coalesce_mmio_region(MemoryListener *listener,
457
                                     MemoryRegionSection *secion,
458
                                     hwaddr start, hwaddr size)
459
{
460
    KVMState *s = kvm_state;
461

    
462
    if (s->coalesced_mmio) {
463
        struct kvm_coalesced_mmio_zone zone;
464

    
465
        zone.addr = start;
466
        zone.size = size;
467
        zone.pad = 0;
468

    
469
        (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
470
    }
471
}
472

    
473
static void kvm_uncoalesce_mmio_region(MemoryListener *listener,
474
                                       MemoryRegionSection *secion,
475
                                       hwaddr start, hwaddr size)
476
{
477
    KVMState *s = kvm_state;
478

    
479
    if (s->coalesced_mmio) {
480
        struct kvm_coalesced_mmio_zone zone;
481

    
482
        zone.addr = start;
483
        zone.size = size;
484
        zone.pad = 0;
485

    
486
        (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
487
    }
488
}
489

    
490
int kvm_check_extension(KVMState *s, unsigned int extension)
491
{
492
    int ret;
493

    
494
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
495
    if (ret < 0) {
496
        ret = 0;
497
    }
498

    
499
    return ret;
500
}
501

    
502
static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val,
503
                                  bool assign, uint32_t size, bool datamatch)
504
{
505
    int ret;
506
    struct kvm_ioeventfd iofd;
507

    
508
    iofd.datamatch = datamatch ? val : 0;
509
    iofd.addr = addr;
510
    iofd.len = size;
511
    iofd.flags = 0;
512
    iofd.fd = fd;
513

    
514
    if (!kvm_enabled()) {
515
        return -ENOSYS;
516
    }
517

    
518
    if (datamatch) {
519
        iofd.flags |= KVM_IOEVENTFD_FLAG_DATAMATCH;
520
    }
521
    if (!assign) {
522
        iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
523
    }
524

    
525
    ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
526

    
527
    if (ret < 0) {
528
        return -errno;
529
    }
530

    
531
    return 0;
532
}
533

    
534
static int kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint16_t val,
535
                                 bool assign, uint32_t size, bool datamatch)
536
{
537
    struct kvm_ioeventfd kick = {
538
        .datamatch = datamatch ? val : 0,
539
        .addr = addr,
540
        .flags = KVM_IOEVENTFD_FLAG_PIO,
541
        .len = size,
542
        .fd = fd,
543
    };
544
    int r;
545
    if (!kvm_enabled()) {
546
        return -ENOSYS;
547
    }
548
    if (datamatch) {
549
        kick.flags |= KVM_IOEVENTFD_FLAG_DATAMATCH;
550
    }
551
    if (!assign) {
552
        kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
553
    }
554
    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
555
    if (r < 0) {
556
        return r;
557
    }
558
    return 0;
559
}
560

    
561

    
562
static int kvm_check_many_ioeventfds(void)
563
{
564
    /* Userspace can use ioeventfd for io notification.  This requires a host
565
     * that supports eventfd(2) and an I/O thread; since eventfd does not
566
     * support SIGIO it cannot interrupt the vcpu.
567
     *
568
     * Older kernels have a 6 device limit on the KVM io bus.  Find out so we
569
     * can avoid creating too many ioeventfds.
570
     */
571
#if defined(CONFIG_EVENTFD)
572
    int ioeventfds[7];
573
    int i, ret = 0;
574
    for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
575
        ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
576
        if (ioeventfds[i] < 0) {
577
            break;
578
        }
579
        ret = kvm_set_ioeventfd_pio(ioeventfds[i], 0, i, true, 2, true);
580
        if (ret < 0) {
581
            close(ioeventfds[i]);
582
            break;
583
        }
584
    }
585

    
586
    /* Decide whether many devices are supported or not */
587
    ret = i == ARRAY_SIZE(ioeventfds);
588

    
589
    while (i-- > 0) {
590
        kvm_set_ioeventfd_pio(ioeventfds[i], 0, i, false, 2, true);
591
        close(ioeventfds[i]);
592
    }
593
    return ret;
594
#else
595
    return 0;
596
#endif
597
}
598

    
599
static const KVMCapabilityInfo *
600
kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
601
{
602
    while (list->name) {
603
        if (!kvm_check_extension(s, list->value)) {
604
            return list;
605
        }
606
        list++;
607
    }
608
    return NULL;
609
}
610

    
611
static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
612
{
613
    KVMState *s = kvm_state;
614
    KVMSlot *mem, old;
615
    int err;
616
    MemoryRegion *mr = section->mr;
617
    bool log_dirty = memory_region_is_logging(mr);
618
    bool writeable = !mr->readonly && !mr->rom_device;
619
    bool readonly_flag = mr->readonly || memory_region_is_romd(mr);
620
    hwaddr start_addr = section->offset_within_address_space;
621
    ram_addr_t size = int128_get64(section->size);
622
    void *ram = NULL;
623
    unsigned delta;
624

    
625
    /* kvm works in page size chunks, but the function may be called
626
       with sub-page size and unaligned start address. */
627
    delta = TARGET_PAGE_ALIGN(size) - size;
628
    if (delta > size) {
629
        return;
630
    }
631
    start_addr += delta;
632
    size -= delta;
633
    size &= TARGET_PAGE_MASK;
634
    if (!size || (start_addr & ~TARGET_PAGE_MASK)) {
635
        return;
636
    }
637

    
638
    if (!memory_region_is_ram(mr)) {
639
        if (writeable || !kvm_readonly_mem_allowed) {
640
            return;
641
        } else if (!mr->romd_mode) {
642
            /* If the memory device is not in romd_mode, then we actually want
643
             * to remove the kvm memory slot so all accesses will trap. */
644
            add = false;
645
        }
646
    }
647

    
648
    ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta;
649

    
650
    while (1) {
651
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
652
        if (!mem) {
653
            break;
654
        }
655

    
656
        if (add && start_addr >= mem->start_addr &&
657
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
658
            (ram - start_addr == mem->ram - mem->start_addr)) {
659
            /* The new slot fits into the existing one and comes with
660
             * identical parameters - update flags and done. */
661
            kvm_slot_dirty_pages_log_change(mem, log_dirty);
662
            return;
663
        }
664

    
665
        old = *mem;
666

    
667
        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
668
            kvm_physical_sync_dirty_bitmap(section);
669
        }
670

    
671
        /* unregister the overlapping slot */
672
        mem->memory_size = 0;
673
        err = kvm_set_user_memory_region(s, mem);
674
        if (err) {
675
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
676
                    __func__, strerror(-err));
677
            abort();
678
        }
679

    
680
        /* Workaround for older KVM versions: we can't join slots, even not by
681
         * unregistering the previous ones and then registering the larger
682
         * slot. We have to maintain the existing fragmentation. Sigh.
683
         *
684
         * This workaround assumes that the new slot starts at the same
685
         * address as the first existing one. If not or if some overlapping
686
         * slot comes around later, we will fail (not seen in practice so far)
687
         * - and actually require a recent KVM version. */
688
        if (s->broken_set_mem_region &&
689
            old.start_addr == start_addr && old.memory_size < size && add) {
690
            mem = kvm_alloc_slot(s);
691
            mem->memory_size = old.memory_size;
692
            mem->start_addr = old.start_addr;
693
            mem->ram = old.ram;
694
            mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
695

    
696
            err = kvm_set_user_memory_region(s, mem);
697
            if (err) {
698
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
699
                        strerror(-err));
700
                abort();
701
            }
702

    
703
            start_addr += old.memory_size;
704
            ram += old.memory_size;
705
            size -= old.memory_size;
706
            continue;
707
        }
708

    
709
        /* register prefix slot */
710
        if (old.start_addr < start_addr) {
711
            mem = kvm_alloc_slot(s);
712
            mem->memory_size = start_addr - old.start_addr;
713
            mem->start_addr = old.start_addr;
714
            mem->ram = old.ram;
715
            mem->flags =  kvm_mem_flags(s, log_dirty, readonly_flag);
716

    
717
            err = kvm_set_user_memory_region(s, mem);
718
            if (err) {
719
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
720
                        __func__, strerror(-err));
721
#ifdef TARGET_PPC
722
                fprintf(stderr, "%s: This is probably because your kernel's " \
723
                                "PAGE_SIZE is too big. Please try to use 4k " \
724
                                "PAGE_SIZE!\n", __func__);
725
#endif
726
                abort();
727
            }
728
        }
729

    
730
        /* register suffix slot */
731
        if (old.start_addr + old.memory_size > start_addr + size) {
732
            ram_addr_t size_delta;
733

    
734
            mem = kvm_alloc_slot(s);
735
            mem->start_addr = start_addr + size;
736
            size_delta = mem->start_addr - old.start_addr;
737
            mem->memory_size = old.memory_size - size_delta;
738
            mem->ram = old.ram + size_delta;
739
            mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
740

    
741
            err = kvm_set_user_memory_region(s, mem);
742
            if (err) {
743
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
744
                        __func__, strerror(-err));
745
                abort();
746
            }
747
        }
748
    }
749

    
750
    /* in case the KVM bug workaround already "consumed" the new slot */
751
    if (!size) {
752
        return;
753
    }
754
    if (!add) {
755
        return;
756
    }
757
    mem = kvm_alloc_slot(s);
758
    mem->memory_size = size;
759
    mem->start_addr = start_addr;
760
    mem->ram = ram;
761
    mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
762

    
763
    err = kvm_set_user_memory_region(s, mem);
764
    if (err) {
765
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
766
                strerror(-err));
767
        abort();
768
    }
769
}
770

    
771
static void kvm_region_add(MemoryListener *listener,
772
                           MemoryRegionSection *section)
773
{
774
    memory_region_ref(section->mr);
775
    kvm_set_phys_mem(section, true);
776
}
777

    
778
static void kvm_region_del(MemoryListener *listener,
779
                           MemoryRegionSection *section)
780
{
781
    kvm_set_phys_mem(section, false);
782
    memory_region_unref(section->mr);
783
}
784

    
785
static void kvm_log_sync(MemoryListener *listener,
786
                         MemoryRegionSection *section)
787
{
788
    int r;
789

    
790
    r = kvm_physical_sync_dirty_bitmap(section);
791
    if (r < 0) {
792
        abort();
793
    }
794
}
795

    
796
static void kvm_log_global_start(struct MemoryListener *listener)
797
{
798
    int r;
799

    
800
    r = kvm_set_migration_log(1);
801
    assert(r >= 0);
802
}
803

    
804
static void kvm_log_global_stop(struct MemoryListener *listener)
805
{
806
    int r;
807

    
808
    r = kvm_set_migration_log(0);
809
    assert(r >= 0);
810
}
811

    
812
static void kvm_mem_ioeventfd_add(MemoryListener *listener,
813
                                  MemoryRegionSection *section,
814
                                  bool match_data, uint64_t data,
815
                                  EventNotifier *e)
816
{
817
    int fd = event_notifier_get_fd(e);
818
    int r;
819

    
820
    r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
821
                               data, true, int128_get64(section->size),
822
                               match_data);
823
    if (r < 0) {
824
        fprintf(stderr, "%s: error adding ioeventfd: %s\n",
825
                __func__, strerror(-r));
826
        abort();
827
    }
828
}
829

    
830
static void kvm_mem_ioeventfd_del(MemoryListener *listener,
831
                                  MemoryRegionSection *section,
832
                                  bool match_data, uint64_t data,
833
                                  EventNotifier *e)
834
{
835
    int fd = event_notifier_get_fd(e);
836
    int r;
837

    
838
    r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
839
                               data, false, int128_get64(section->size),
840
                               match_data);
841
    if (r < 0) {
842
        abort();
843
    }
844
}
845

    
846
static void kvm_io_ioeventfd_add(MemoryListener *listener,
847
                                 MemoryRegionSection *section,
848
                                 bool match_data, uint64_t data,
849
                                 EventNotifier *e)
850
{
851
    int fd = event_notifier_get_fd(e);
852
    int r;
853

    
854
    r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
855
                              data, true, int128_get64(section->size),
856
                              match_data);
857
    if (r < 0) {
858
        fprintf(stderr, "%s: error adding ioeventfd: %s\n",
859
                __func__, strerror(-r));
860
        abort();
861
    }
862
}
863

    
864
static void kvm_io_ioeventfd_del(MemoryListener *listener,
865
                                 MemoryRegionSection *section,
866
                                 bool match_data, uint64_t data,
867
                                 EventNotifier *e)
868

    
869
{
870
    int fd = event_notifier_get_fd(e);
871
    int r;
872

    
873
    r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
874
                              data, false, int128_get64(section->size),
875
                              match_data);
876
    if (r < 0) {
877
        abort();
878
    }
879
}
880

    
881
static MemoryListener kvm_memory_listener = {
882
    .region_add = kvm_region_add,
883
    .region_del = kvm_region_del,
884
    .log_start = kvm_log_start,
885
    .log_stop = kvm_log_stop,
886
    .log_sync = kvm_log_sync,
887
    .log_global_start = kvm_log_global_start,
888
    .log_global_stop = kvm_log_global_stop,
889
    .eventfd_add = kvm_mem_ioeventfd_add,
890
    .eventfd_del = kvm_mem_ioeventfd_del,
891
    .coalesced_mmio_add = kvm_coalesce_mmio_region,
892
    .coalesced_mmio_del = kvm_uncoalesce_mmio_region,
893
    .priority = 10,
894
};
895

    
896
static MemoryListener kvm_io_listener = {
897
    .eventfd_add = kvm_io_ioeventfd_add,
898
    .eventfd_del = kvm_io_ioeventfd_del,
899
    .priority = 10,
900
};
901

    
902
static void kvm_handle_interrupt(CPUState *cpu, int mask)
903
{
904
    cpu->interrupt_request |= mask;
905

    
906
    if (!qemu_cpu_is_self(cpu)) {
907
        qemu_cpu_kick(cpu);
908
    }
909
}
910

    
911
int kvm_set_irq(KVMState *s, int irq, int level)
912
{
913
    struct kvm_irq_level event;
914
    int ret;
915

    
916
    assert(kvm_async_interrupts_enabled());
917

    
918
    event.level = level;
919
    event.irq = irq;
920
    ret = kvm_vm_ioctl(s, s->irq_set_ioctl, &event);
921
    if (ret < 0) {
922
        perror("kvm_set_irq");
923
        abort();
924
    }
925

    
926
    return (s->irq_set_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
927
}
928

    
929
#ifdef KVM_CAP_IRQ_ROUTING
930
typedef struct KVMMSIRoute {
931
    struct kvm_irq_routing_entry kroute;
932
    QTAILQ_ENTRY(KVMMSIRoute) entry;
933
} KVMMSIRoute;
934

    
935
static void set_gsi(KVMState *s, unsigned int gsi)
936
{
937
    s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
938
}
939

    
940
static void clear_gsi(KVMState *s, unsigned int gsi)
941
{
942
    s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32));
943
}
944

    
945
void kvm_init_irq_routing(KVMState *s)
946
{
947
    int gsi_count, i;
948

    
949
    gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING);
950
    if (gsi_count > 0) {
951
        unsigned int gsi_bits, i;
952

    
953
        /* Round up so we can search ints using ffs */
954
        gsi_bits = ALIGN(gsi_count, 32);
955
        s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
956
        s->gsi_count = gsi_count;
957

    
958
        /* Mark any over-allocated bits as already in use */
959
        for (i = gsi_count; i < gsi_bits; i++) {
960
            set_gsi(s, i);
961
        }
962
    }
963

    
964
    s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
965
    s->nr_allocated_irq_routes = 0;
966

    
967
    if (!s->direct_msi) {
968
        for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
969
            QTAILQ_INIT(&s->msi_hashtab[i]);
970
        }
971
    }
972

    
973
    kvm_arch_init_irq_routing(s);
974
}
975

    
976
void kvm_irqchip_commit_routes(KVMState *s)
977
{
978
    int ret;
979

    
980
    s->irq_routes->flags = 0;
981
    ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
982
    assert(ret == 0);
983
}
984

    
985
static void kvm_add_routing_entry(KVMState *s,
986
                                  struct kvm_irq_routing_entry *entry)
987
{
988
    struct kvm_irq_routing_entry *new;
989
    int n, size;
990

    
991
    if (s->irq_routes->nr == s->nr_allocated_irq_routes) {
992
        n = s->nr_allocated_irq_routes * 2;
993
        if (n < 64) {
994
            n = 64;
995
        }
996
        size = sizeof(struct kvm_irq_routing);
997
        size += n * sizeof(*new);
998
        s->irq_routes = g_realloc(s->irq_routes, size);
999
        s->nr_allocated_irq_routes = n;
1000
    }
1001
    n = s->irq_routes->nr++;
1002
    new = &s->irq_routes->entries[n];
1003

    
1004
    *new = *entry;
1005

    
1006
    set_gsi(s, entry->gsi);
1007
}
1008

    
1009
static int kvm_update_routing_entry(KVMState *s,
1010
                                    struct kvm_irq_routing_entry *new_entry)
1011
{
1012
    struct kvm_irq_routing_entry *entry;
1013
    int n;
1014

    
1015
    for (n = 0; n < s->irq_routes->nr; n++) {
1016
        entry = &s->irq_routes->entries[n];
1017
        if (entry->gsi != new_entry->gsi) {
1018
            continue;
1019
        }
1020

    
1021
        if(!memcmp(entry, new_entry, sizeof *entry)) {
1022
            return 0;
1023
        }
1024

    
1025
        *entry = *new_entry;
1026

    
1027
        kvm_irqchip_commit_routes(s);
1028

    
1029
        return 0;
1030
    }
1031

    
1032
    return -ESRCH;
1033
}
1034

    
1035
void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin)
1036
{
1037
    struct kvm_irq_routing_entry e = {};
1038

    
1039
    assert(pin < s->gsi_count);
1040

    
1041
    e.gsi = irq;
1042
    e.type = KVM_IRQ_ROUTING_IRQCHIP;
1043
    e.flags = 0;
1044
    e.u.irqchip.irqchip = irqchip;
1045
    e.u.irqchip.pin = pin;
1046
    kvm_add_routing_entry(s, &e);
1047
}
1048

    
1049
void kvm_irqchip_release_virq(KVMState *s, int virq)
1050
{
1051
    struct kvm_irq_routing_entry *e;
1052
    int i;
1053

    
1054
    if (kvm_gsi_direct_mapping()) {
1055
        return;
1056
    }
1057

    
1058
    for (i = 0; i < s->irq_routes->nr; i++) {
1059
        e = &s->irq_routes->entries[i];
1060
        if (e->gsi == virq) {
1061
            s->irq_routes->nr--;
1062
            *e = s->irq_routes->entries[s->irq_routes->nr];
1063
        }
1064
    }
1065
    clear_gsi(s, virq);
1066
}
1067

    
1068
static unsigned int kvm_hash_msi(uint32_t data)
1069
{
1070
    /* This is optimized for IA32 MSI layout. However, no other arch shall
1071
     * repeat the mistake of not providing a direct MSI injection API. */
1072
    return data & 0xff;
1073
}
1074

    
1075
static void kvm_flush_dynamic_msi_routes(KVMState *s)
1076
{
1077
    KVMMSIRoute *route, *next;
1078
    unsigned int hash;
1079

    
1080
    for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) {
1081
        QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) {
1082
            kvm_irqchip_release_virq(s, route->kroute.gsi);
1083
            QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry);
1084
            g_free(route);
1085
        }
1086
    }
1087
}
1088

    
1089
static int kvm_irqchip_get_virq(KVMState *s)
1090
{
1091
    uint32_t *word = s->used_gsi_bitmap;
1092
    int max_words = ALIGN(s->gsi_count, 32) / 32;
1093
    int i, bit;
1094
    bool retry = true;
1095

    
1096
again:
1097
    /* Return the lowest unused GSI in the bitmap */
1098
    for (i = 0; i < max_words; i++) {
1099
        bit = ffs(~word[i]);
1100
        if (!bit) {
1101
            continue;
1102
        }
1103

    
1104
        return bit - 1 + i * 32;
1105
    }
1106
    if (!s->direct_msi && retry) {
1107
        retry = false;
1108
        kvm_flush_dynamic_msi_routes(s);
1109
        goto again;
1110
    }
1111
    return -ENOSPC;
1112

    
1113
}
1114

    
1115
static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
1116
{
1117
    unsigned int hash = kvm_hash_msi(msg.data);
1118
    KVMMSIRoute *route;
1119

    
1120
    QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) {
1121
        if (route->kroute.u.msi.address_lo == (uint32_t)msg.address &&
1122
            route->kroute.u.msi.address_hi == (msg.address >> 32) &&
1123
            route->kroute.u.msi.data == le32_to_cpu(msg.data)) {
1124
            return route;
1125
        }
1126
    }
1127
    return NULL;
1128
}
1129

    
1130
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
1131
{
1132
    struct kvm_msi msi;
1133
    KVMMSIRoute *route;
1134

    
1135
    if (s->direct_msi) {
1136
        msi.address_lo = (uint32_t)msg.address;
1137
        msi.address_hi = msg.address >> 32;
1138
        msi.data = le32_to_cpu(msg.data);
1139
        msi.flags = 0;
1140
        memset(msi.pad, 0, sizeof(msi.pad));
1141

    
1142
        return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi);
1143
    }
1144

    
1145
    route = kvm_lookup_msi_route(s, msg);
1146
    if (!route) {
1147
        int virq;
1148

    
1149
        virq = kvm_irqchip_get_virq(s);
1150
        if (virq < 0) {
1151
            return virq;
1152
        }
1153

    
1154
        route = g_malloc0(sizeof(KVMMSIRoute));
1155
        route->kroute.gsi = virq;
1156
        route->kroute.type = KVM_IRQ_ROUTING_MSI;
1157
        route->kroute.flags = 0;
1158
        route->kroute.u.msi.address_lo = (uint32_t)msg.address;
1159
        route->kroute.u.msi.address_hi = msg.address >> 32;
1160
        route->kroute.u.msi.data = le32_to_cpu(msg.data);
1161

    
1162
        kvm_add_routing_entry(s, &route->kroute);
1163
        kvm_irqchip_commit_routes(s);
1164

    
1165
        QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route,
1166
                           entry);
1167
    }
1168

    
1169
    assert(route->kroute.type == KVM_IRQ_ROUTING_MSI);
1170

    
1171
    return kvm_set_irq(s, route->kroute.gsi, 1);
1172
}
1173

    
1174
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
1175
{
1176
    struct kvm_irq_routing_entry kroute = {};
1177
    int virq;
1178

    
1179
    if (kvm_gsi_direct_mapping()) {
1180
        return msg.data & 0xffff;
1181
    }
1182

    
1183
    if (!kvm_gsi_routing_enabled()) {
1184
        return -ENOSYS;
1185
    }
1186

    
1187
    virq = kvm_irqchip_get_virq(s);
1188
    if (virq < 0) {
1189
        return virq;
1190
    }
1191

    
1192
    kroute.gsi = virq;
1193
    kroute.type = KVM_IRQ_ROUTING_MSI;
1194
    kroute.flags = 0;
1195
    kroute.u.msi.address_lo = (uint32_t)msg.address;
1196
    kroute.u.msi.address_hi = msg.address >> 32;
1197
    kroute.u.msi.data = le32_to_cpu(msg.data);
1198

    
1199
    kvm_add_routing_entry(s, &kroute);
1200
    kvm_irqchip_commit_routes(s);
1201

    
1202
    return virq;
1203
}
1204

    
1205
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
1206
{
1207
    struct kvm_irq_routing_entry kroute = {};
1208

    
1209
    if (kvm_gsi_direct_mapping()) {
1210
        return 0;
1211
    }
1212

    
1213
    if (!kvm_irqchip_in_kernel()) {
1214
        return -ENOSYS;
1215
    }
1216

    
1217
    kroute.gsi = virq;
1218
    kroute.type = KVM_IRQ_ROUTING_MSI;
1219
    kroute.flags = 0;
1220
    kroute.u.msi.address_lo = (uint32_t)msg.address;
1221
    kroute.u.msi.address_hi = msg.address >> 32;
1222
    kroute.u.msi.data = le32_to_cpu(msg.data);
1223

    
1224
    return kvm_update_routing_entry(s, &kroute);
1225
}
1226

    
1227
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int rfd, int virq,
1228
                                    bool assign)
1229
{
1230
    struct kvm_irqfd irqfd = {
1231
        .fd = fd,
1232
        .gsi = virq,
1233
        .flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
1234
    };
1235

    
1236
    if (rfd != -1) {
1237
        irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
1238
        irqfd.resamplefd = rfd;
1239
    }
1240

    
1241
    if (!kvm_irqfds_enabled()) {
1242
        return -ENOSYS;
1243
    }
1244

    
1245
    return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
1246
}
1247

    
1248
#else /* !KVM_CAP_IRQ_ROUTING */
1249

    
1250
void kvm_init_irq_routing(KVMState *s)
1251
{
1252
}
1253

    
1254
void kvm_irqchip_release_virq(KVMState *s, int virq)
1255
{
1256
}
1257

    
1258
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
1259
{
1260
    abort();
1261
}
1262

    
1263
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
1264
{
1265
    return -ENOSYS;
1266
}
1267

    
1268
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
1269
{
1270
    abort();
1271
}
1272

    
1273
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
1274
{
1275
    return -ENOSYS;
1276
}
1277
#endif /* !KVM_CAP_IRQ_ROUTING */
1278

    
1279
int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
1280
                                   EventNotifier *rn, int virq)
1281
{
1282
    return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
1283
           rn ? event_notifier_get_fd(rn) : -1, virq, true);
1284
}
1285

    
1286
int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
1287
{
1288
    return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
1289
           false);
1290
}
1291

    
1292
static int kvm_irqchip_create(KVMState *s)
1293
{
1294
    int ret;
1295

    
1296
    if (!qemu_opt_get_bool(qemu_get_machine_opts(), "kernel_irqchip", true) ||
1297
        !kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
1298
        return 0;
1299
    }
1300

    
1301
    /* First probe and see if there's a arch-specific hook to create the
1302
     * in-kernel irqchip for us */
1303
    ret = kvm_arch_irqchip_create(s);
1304
    if (ret < 0) {
1305
        return ret;
1306
    } else if (ret == 0) {
1307
        ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
1308
        if (ret < 0) {
1309
            fprintf(stderr, "Create kernel irqchip failed\n");
1310
            return ret;
1311
        }
1312
    }
1313

    
1314
    kvm_kernel_irqchip = true;
1315
    /* If we have an in-kernel IRQ chip then we must have asynchronous
1316
     * interrupt delivery (though the reverse is not necessarily true)
1317
     */
1318
    kvm_async_interrupts_allowed = true;
1319
    kvm_halt_in_kernel_allowed = true;
1320

    
1321
    kvm_init_irq_routing(s);
1322

    
1323
    return 0;
1324
}
1325

    
1326
/* Find number of supported CPUs using the recommended
1327
 * procedure from the kernel API documentation to cope with
1328
 * older kernels that may be missing capabilities.
1329
 */
1330
static int kvm_recommended_vcpus(KVMState *s)
1331
{
1332
    int ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS);
1333
    return (ret) ? ret : 4;
1334
}
1335

    
1336
static int kvm_max_vcpus(KVMState *s)
1337
{
1338
    int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
1339
    return (ret) ? ret : kvm_recommended_vcpus(s);
1340
}
1341

    
1342
int kvm_init(void)
1343
{
1344
    static const char upgrade_note[] =
1345
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
1346
        "(see http://sourceforge.net/projects/kvm).\n";
1347
    struct {
1348
        const char *name;
1349
        int num;
1350
    } num_cpus[] = {
1351
        { "SMP",          smp_cpus },
1352
        { "hotpluggable", max_cpus },
1353
        { NULL, }
1354
    }, *nc = num_cpus;
1355
    int soft_vcpus_limit, hard_vcpus_limit;
1356
    KVMState *s;
1357
    const KVMCapabilityInfo *missing_cap;
1358
    int ret;
1359
    int i;
1360

    
1361
    s = g_malloc0(sizeof(KVMState));
1362

    
1363
    /*
1364
     * On systems where the kernel can support different base page
1365
     * sizes, host page size may be different from TARGET_PAGE_SIZE,
1366
     * even with KVM.  TARGET_PAGE_SIZE is assumed to be the minimum
1367
     * page size for the system though.
1368
     */
1369
    assert(TARGET_PAGE_SIZE <= getpagesize());
1370
    page_size_init();
1371

    
1372
#ifdef KVM_CAP_SET_GUEST_DEBUG
1373
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
1374
#endif
1375
    s->vmfd = -1;
1376
    s->fd = qemu_open("/dev/kvm", O_RDWR);
1377
    if (s->fd == -1) {
1378
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
1379
        ret = -errno;
1380
        goto err;
1381
    }
1382

    
1383
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
1384
    if (ret < KVM_API_VERSION) {
1385
        if (ret > 0) {
1386
            ret = -EINVAL;
1387
        }
1388
        fprintf(stderr, "kvm version too old\n");
1389
        goto err;
1390
    }
1391

    
1392
    if (ret > KVM_API_VERSION) {
1393
        ret = -EINVAL;
1394
        fprintf(stderr, "kvm version not supported\n");
1395
        goto err;
1396
    }
1397

    
1398
    s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
1399

    
1400
    /* If unspecified, use the default value */
1401
    if (!s->nr_slots) {
1402
        s->nr_slots = 32;
1403
    }
1404

    
1405
    s->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot));
1406

    
1407
    for (i = 0; i < s->nr_slots; i++) {
1408
        s->slots[i].slot = i;
1409
    }
1410

    
1411
    /* check the vcpu limits */
1412
    soft_vcpus_limit = kvm_recommended_vcpus(s);
1413
    hard_vcpus_limit = kvm_max_vcpus(s);
1414

    
1415
    while (nc->name) {
1416
        if (nc->num > soft_vcpus_limit) {
1417
            fprintf(stderr,
1418
                    "Warning: Number of %s cpus requested (%d) exceeds "
1419
                    "the recommended cpus supported by KVM (%d)\n",
1420
                    nc->name, nc->num, soft_vcpus_limit);
1421

    
1422
            if (nc->num > hard_vcpus_limit) {
1423
                ret = -EINVAL;
1424
                fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
1425
                        "the maximum cpus supported by KVM (%d)\n",
1426
                        nc->name, nc->num, hard_vcpus_limit);
1427
                goto err;
1428
            }
1429
        }
1430
        nc++;
1431
    }
1432

    
1433
    do {
1434
        ret = kvm_ioctl(s, KVM_CREATE_VM, 0);
1435
    } while (ret == -EINTR);
1436

    
1437
    if (ret < 0) {
1438
        fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -ret,
1439
                strerror(-ret));
1440

    
1441
#ifdef TARGET_S390X
1442
        fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
1443
                        "your host kernel command line\n");
1444
#endif
1445
        goto err;
1446
    }
1447

    
1448
    s->vmfd = ret;
1449
    missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
1450
    if (!missing_cap) {
1451
        missing_cap =
1452
            kvm_check_extension_list(s, kvm_arch_required_capabilities);
1453
    }
1454
    if (missing_cap) {
1455
        ret = -EINVAL;
1456
        fprintf(stderr, "kvm does not support %s\n%s",
1457
                missing_cap->name, upgrade_note);
1458
        goto err;
1459
    }
1460

    
1461
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
1462

    
1463
    s->broken_set_mem_region = 1;
1464
    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
1465
    if (ret > 0) {
1466
        s->broken_set_mem_region = 0;
1467
    }
1468

    
1469
#ifdef KVM_CAP_VCPU_EVENTS
1470
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
1471
#endif
1472

    
1473
    s->robust_singlestep =
1474
        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
1475

    
1476
#ifdef KVM_CAP_DEBUGREGS
1477
    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
1478
#endif
1479

    
1480
#ifdef KVM_CAP_XSAVE
1481
    s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
1482
#endif
1483

    
1484
#ifdef KVM_CAP_XCRS
1485
    s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
1486
#endif
1487

    
1488
#ifdef KVM_CAP_PIT_STATE2
1489
    s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
1490
#endif
1491

    
1492
#ifdef KVM_CAP_IRQ_ROUTING
1493
    s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
1494
#endif
1495

    
1496
    s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
1497

    
1498
    s->irq_set_ioctl = KVM_IRQ_LINE;
1499
    if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
1500
        s->irq_set_ioctl = KVM_IRQ_LINE_STATUS;
1501
    }
1502

    
1503
#ifdef KVM_CAP_READONLY_MEM
1504
    kvm_readonly_mem_allowed =
1505
        (kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0);
1506
#endif
1507

    
1508
    ret = kvm_arch_init(s);
1509
    if (ret < 0) {
1510
        goto err;
1511
    }
1512

    
1513
    ret = kvm_irqchip_create(s);
1514
    if (ret < 0) {
1515
        goto err;
1516
    }
1517

    
1518
    kvm_state = s;
1519
    memory_listener_register(&kvm_memory_listener, &address_space_memory);
1520
    memory_listener_register(&kvm_io_listener, &address_space_io);
1521

    
1522
    s->many_ioeventfds = kvm_check_many_ioeventfds();
1523

    
1524
    cpu_interrupt_handler = kvm_handle_interrupt;
1525

    
1526
    return 0;
1527

    
1528
err:
1529
    if (s->vmfd >= 0) {
1530
        close(s->vmfd);
1531
    }
1532
    if (s->fd != -1) {
1533
        close(s->fd);
1534
    }
1535
    g_free(s->slots);
1536
    g_free(s);
1537

    
1538
    return ret;
1539
}
1540

    
1541
static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
1542
                          uint32_t count)
1543
{
1544
    int i;
1545
    uint8_t *ptr = data;
1546

    
1547
    for (i = 0; i < count; i++) {
1548
        address_space_rw(&address_space_io, port, ptr, size,
1549
                         direction == KVM_EXIT_IO_OUT);
1550
        ptr += size;
1551
    }
1552
}
1553

    
1554
static int kvm_handle_internal_error(CPUState *cpu, struct kvm_run *run)
1555
{
1556
    fprintf(stderr, "KVM internal error. Suberror: %d\n",
1557
            run->internal.suberror);
1558

    
1559
    if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
1560
        int i;
1561

    
1562
        for (i = 0; i < run->internal.ndata; ++i) {
1563
            fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
1564
                    i, (uint64_t)run->internal.data[i]);
1565
        }
1566
    }
1567
    if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
1568
        fprintf(stderr, "emulation failure\n");
1569
        if (!kvm_arch_stop_on_emulation_error(cpu)) {
1570
            cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
1571
            return EXCP_INTERRUPT;
1572
        }
1573
    }
1574
    /* FIXME: Should trigger a qmp message to let management know
1575
     * something went wrong.
1576
     */
1577
    return -1;
1578
}
1579

    
1580
void kvm_flush_coalesced_mmio_buffer(void)
1581
{
1582
    KVMState *s = kvm_state;
1583

    
1584
    if (s->coalesced_flush_in_progress) {
1585
        return;
1586
    }
1587

    
1588
    s->coalesced_flush_in_progress = true;
1589

    
1590
    if (s->coalesced_mmio_ring) {
1591
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
1592
        while (ring->first != ring->last) {
1593
            struct kvm_coalesced_mmio *ent;
1594

    
1595
            ent = &ring->coalesced_mmio[ring->first];
1596

    
1597
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
1598
            smp_wmb();
1599
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
1600
        }
1601
    }
1602

    
1603
    s->coalesced_flush_in_progress = false;
1604
}
1605

    
1606
static void do_kvm_cpu_synchronize_state(void *arg)
1607
{
1608
    CPUState *cpu = arg;
1609

    
1610
    if (!cpu->kvm_vcpu_dirty) {
1611
        kvm_arch_get_registers(cpu);
1612
        cpu->kvm_vcpu_dirty = true;
1613
    }
1614
}
1615

    
1616
void kvm_cpu_synchronize_state(CPUState *cpu)
1617
{
1618
    if (!cpu->kvm_vcpu_dirty) {
1619
        run_on_cpu(cpu, do_kvm_cpu_synchronize_state, cpu);
1620
    }
1621
}
1622

    
1623
void kvm_cpu_synchronize_post_reset(CPUState *cpu)
1624
{
1625
    kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
1626
    cpu->kvm_vcpu_dirty = false;
1627
}
1628

    
1629
void kvm_cpu_synchronize_post_init(CPUState *cpu)
1630
{
1631
    kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
1632
    cpu->kvm_vcpu_dirty = false;
1633
}
1634

    
1635
int kvm_cpu_exec(CPUState *cpu)
1636
{
1637
    struct kvm_run *run = cpu->kvm_run;
1638
    int ret, run_ret;
1639

    
1640
    DPRINTF("kvm_cpu_exec()\n");
1641

    
1642
    if (kvm_arch_process_async_events(cpu)) {
1643
        cpu->exit_request = 0;
1644
        return EXCP_HLT;
1645
    }
1646

    
1647
    do {
1648
        if (cpu->kvm_vcpu_dirty) {
1649
            kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
1650
            cpu->kvm_vcpu_dirty = false;
1651
        }
1652

    
1653
        kvm_arch_pre_run(cpu, run);
1654
        if (cpu->exit_request) {
1655
            DPRINTF("interrupt exit requested\n");
1656
            /*
1657
             * KVM requires us to reenter the kernel after IO exits to complete
1658
             * instruction emulation. This self-signal will ensure that we
1659
             * leave ASAP again.
1660
             */
1661
            qemu_cpu_kick_self();
1662
        }
1663
        qemu_mutex_unlock_iothread();
1664

    
1665
        run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
1666

    
1667
        qemu_mutex_lock_iothread();
1668
        kvm_arch_post_run(cpu, run);
1669

    
1670
        if (run_ret < 0) {
1671
            if (run_ret == -EINTR || run_ret == -EAGAIN) {
1672
                DPRINTF("io window exit\n");
1673
                ret = EXCP_INTERRUPT;
1674
                break;
1675
            }
1676
            fprintf(stderr, "error: kvm run failed %s\n",
1677
                    strerror(-run_ret));
1678
            abort();
1679
        }
1680

    
1681
        trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
1682
        switch (run->exit_reason) {
1683
        case KVM_EXIT_IO:
1684
            DPRINTF("handle_io\n");
1685
            kvm_handle_io(run->io.port,
1686
                          (uint8_t *)run + run->io.data_offset,
1687
                          run->io.direction,
1688
                          run->io.size,
1689
                          run->io.count);
1690
            ret = 0;
1691
            break;
1692
        case KVM_EXIT_MMIO:
1693
            DPRINTF("handle_mmio\n");
1694
            cpu_physical_memory_rw(run->mmio.phys_addr,
1695
                                   run->mmio.data,
1696
                                   run->mmio.len,
1697
                                   run->mmio.is_write);
1698
            ret = 0;
1699
            break;
1700
        case KVM_EXIT_IRQ_WINDOW_OPEN:
1701
            DPRINTF("irq_window_open\n");
1702
            ret = EXCP_INTERRUPT;
1703
            break;
1704
        case KVM_EXIT_SHUTDOWN:
1705
            DPRINTF("shutdown\n");
1706
            qemu_system_reset_request();
1707
            ret = EXCP_INTERRUPT;
1708
            break;
1709
        case KVM_EXIT_UNKNOWN:
1710
            fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
1711
                    (uint64_t)run->hw.hardware_exit_reason);
1712
            ret = -1;
1713
            break;
1714
        case KVM_EXIT_INTERNAL_ERROR:
1715
            ret = kvm_handle_internal_error(cpu, run);
1716
            break;
1717
        default:
1718
            DPRINTF("kvm_arch_handle_exit\n");
1719
            ret = kvm_arch_handle_exit(cpu, run);
1720
            break;
1721
        }
1722
    } while (ret == 0);
1723

    
1724
    if (ret < 0) {
1725
        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
1726
        vm_stop(RUN_STATE_INTERNAL_ERROR);
1727
    }
1728

    
1729
    cpu->exit_request = 0;
1730
    return ret;
1731
}
1732

    
1733
int kvm_ioctl(KVMState *s, int type, ...)
1734
{
1735
    int ret;
1736
    void *arg;
1737
    va_list ap;
1738

    
1739
    va_start(ap, type);
1740
    arg = va_arg(ap, void *);
1741
    va_end(ap);
1742

    
1743
    trace_kvm_ioctl(type, arg);
1744
    ret = ioctl(s->fd, type, arg);
1745
    if (ret == -1) {
1746
        ret = -errno;
1747
    }
1748
    return ret;
1749
}
1750

    
1751
int kvm_vm_ioctl(KVMState *s, int type, ...)
1752
{
1753
    int ret;
1754
    void *arg;
1755
    va_list ap;
1756

    
1757
    va_start(ap, type);
1758
    arg = va_arg(ap, void *);
1759
    va_end(ap);
1760

    
1761
    trace_kvm_vm_ioctl(type, arg);
1762
    ret = ioctl(s->vmfd, type, arg);
1763
    if (ret == -1) {
1764
        ret = -errno;
1765
    }
1766
    return ret;
1767
}
1768

    
1769
int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
1770
{
1771
    int ret;
1772
    void *arg;
1773
    va_list ap;
1774

    
1775
    va_start(ap, type);
1776
    arg = va_arg(ap, void *);
1777
    va_end(ap);
1778

    
1779
    trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
1780
    ret = ioctl(cpu->kvm_fd, type, arg);
1781
    if (ret == -1) {
1782
        ret = -errno;
1783
    }
1784
    return ret;
1785
}
1786

    
1787
int kvm_device_ioctl(int fd, int type, ...)
1788
{
1789
    int ret;
1790
    void *arg;
1791
    va_list ap;
1792

    
1793
    va_start(ap, type);
1794
    arg = va_arg(ap, void *);
1795
    va_end(ap);
1796

    
1797
    trace_kvm_device_ioctl(fd, type, arg);
1798
    ret = ioctl(fd, type, arg);
1799
    if (ret == -1) {
1800
        ret = -errno;
1801
    }
1802
    return ret;
1803
}
1804

    
1805
int kvm_has_sync_mmu(void)
1806
{
1807
    return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
1808
}
1809

    
1810
int kvm_has_vcpu_events(void)
1811
{
1812
    return kvm_state->vcpu_events;
1813
}
1814

    
1815
int kvm_has_robust_singlestep(void)
1816
{
1817
    return kvm_state->robust_singlestep;
1818
}
1819

    
1820
int kvm_has_debugregs(void)
1821
{
1822
    return kvm_state->debugregs;
1823
}
1824

    
1825
int kvm_has_xsave(void)
1826
{
1827
    return kvm_state->xsave;
1828
}
1829

    
1830
int kvm_has_xcrs(void)
1831
{
1832
    return kvm_state->xcrs;
1833
}
1834

    
1835
int kvm_has_pit_state2(void)
1836
{
1837
    return kvm_state->pit_state2;
1838
}
1839

    
1840
int kvm_has_many_ioeventfds(void)
1841
{
1842
    if (!kvm_enabled()) {
1843
        return 0;
1844
    }
1845
    return kvm_state->many_ioeventfds;
1846
}
1847

    
1848
int kvm_has_gsi_routing(void)
1849
{
1850
#ifdef KVM_CAP_IRQ_ROUTING
1851
    return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1852
#else
1853
    return false;
1854
#endif
1855
}
1856

    
1857
int kvm_has_intx_set_mask(void)
1858
{
1859
    return kvm_state->intx_set_mask;
1860
}
1861

    
1862
void kvm_setup_guest_memory(void *start, size_t size)
1863
{
1864
#ifdef CONFIG_VALGRIND_H
1865
    VALGRIND_MAKE_MEM_DEFINED(start, size);
1866
#endif
1867
    if (!kvm_has_sync_mmu()) {
1868
        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
1869

    
1870
        if (ret) {
1871
            perror("qemu_madvise");
1872
            fprintf(stderr,
1873
                    "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
1874
            exit(1);
1875
        }
1876
    }
1877
}
1878

    
1879
#ifdef KVM_CAP_SET_GUEST_DEBUG
1880
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
1881
                                                 target_ulong pc)
1882
{
1883
    struct kvm_sw_breakpoint *bp;
1884

    
1885
    QTAILQ_FOREACH(bp, &cpu->kvm_state->kvm_sw_breakpoints, entry) {
1886
        if (bp->pc == pc) {
1887
            return bp;
1888
        }
1889
    }
1890
    return NULL;
1891
}
1892

    
1893
int kvm_sw_breakpoints_active(CPUState *cpu)
1894
{
1895
    return !QTAILQ_EMPTY(&cpu->kvm_state->kvm_sw_breakpoints);
1896
}
1897

    
1898
struct kvm_set_guest_debug_data {
1899
    struct kvm_guest_debug dbg;
1900
    CPUState *cpu;
1901
    int err;
1902
};
1903

    
1904
static void kvm_invoke_set_guest_debug(void *data)
1905
{
1906
    struct kvm_set_guest_debug_data *dbg_data = data;
1907

    
1908
    dbg_data->err = kvm_vcpu_ioctl(dbg_data->cpu, KVM_SET_GUEST_DEBUG,
1909
                                   &dbg_data->dbg);
1910
}
1911

    
1912
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
1913
{
1914
    struct kvm_set_guest_debug_data data;
1915

    
1916
    data.dbg.control = reinject_trap;
1917

    
1918
    if (cpu->singlestep_enabled) {
1919
        data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1920
    }
1921
    kvm_arch_update_guest_debug(cpu, &data.dbg);
1922
    data.cpu = cpu;
1923

    
1924
    run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data);
1925
    return data.err;
1926
}
1927

    
1928
int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
1929
                          target_ulong len, int type)
1930
{
1931
    struct kvm_sw_breakpoint *bp;
1932
    int err;
1933

    
1934
    if (type == GDB_BREAKPOINT_SW) {
1935
        bp = kvm_find_sw_breakpoint(cpu, addr);
1936
        if (bp) {
1937
            bp->use_count++;
1938
            return 0;
1939
        }
1940

    
1941
        bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
1942
        if (!bp) {
1943
            return -ENOMEM;
1944
        }
1945

    
1946
        bp->pc = addr;
1947
        bp->use_count = 1;
1948
        err = kvm_arch_insert_sw_breakpoint(cpu, bp);
1949
        if (err) {
1950
            g_free(bp);
1951
            return err;
1952
        }
1953

    
1954
        QTAILQ_INSERT_HEAD(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
1955
    } else {
1956
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1957
        if (err) {
1958
            return err;
1959
        }
1960
    }
1961

    
1962
    CPU_FOREACH(cpu) {
1963
        err = kvm_update_guest_debug(cpu, 0);
1964
        if (err) {
1965
            return err;
1966
        }
1967
    }
1968
    return 0;
1969
}
1970

    
1971
int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
1972
                          target_ulong len, int type)
1973
{
1974
    struct kvm_sw_breakpoint *bp;
1975
    int err;
1976

    
1977
    if (type == GDB_BREAKPOINT_SW) {
1978
        bp = kvm_find_sw_breakpoint(cpu, addr);
1979
        if (!bp) {
1980
            return -ENOENT;
1981
        }
1982

    
1983
        if (bp->use_count > 1) {
1984
            bp->use_count--;
1985
            return 0;
1986
        }
1987

    
1988
        err = kvm_arch_remove_sw_breakpoint(cpu, bp);
1989
        if (err) {
1990
            return err;
1991
        }
1992

    
1993
        QTAILQ_REMOVE(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
1994
        g_free(bp);
1995
    } else {
1996
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1997
        if (err) {
1998
            return err;
1999
        }
2000
    }
2001

    
2002
    CPU_FOREACH(cpu) {
2003
        err = kvm_update_guest_debug(cpu, 0);
2004
        if (err) {
2005
            return err;
2006
        }
2007
    }
2008
    return 0;
2009
}
2010

    
2011
void kvm_remove_all_breakpoints(CPUState *cpu)
2012
{
2013
    struct kvm_sw_breakpoint *bp, *next;
2014
    KVMState *s = cpu->kvm_state;
2015

    
2016
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
2017
        if (kvm_arch_remove_sw_breakpoint(cpu, bp) != 0) {
2018
            /* Try harder to find a CPU that currently sees the breakpoint. */
2019
            CPU_FOREACH(cpu) {
2020
                if (kvm_arch_remove_sw_breakpoint(cpu, bp) == 0) {
2021
                    break;
2022
                }
2023
            }
2024
        }
2025
        QTAILQ_REMOVE(&s->kvm_sw_breakpoints, bp, entry);
2026
        g_free(bp);
2027
    }
2028
    kvm_arch_remove_all_hw_breakpoints();
2029

    
2030
    CPU_FOREACH(cpu) {
2031
        kvm_update_guest_debug(cpu, 0);
2032
    }
2033
}
2034

    
2035
#else /* !KVM_CAP_SET_GUEST_DEBUG */
2036

    
2037
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
2038
{
2039
    return -EINVAL;
2040
}
2041

    
2042
int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
2043
                          target_ulong len, int type)
2044
{
2045
    return -EINVAL;
2046
}
2047

    
2048
int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
2049
                          target_ulong len, int type)
2050
{
2051
    return -EINVAL;
2052
}
2053

    
2054
void kvm_remove_all_breakpoints(CPUState *cpu)
2055
{
2056
}
2057
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
2058

    
2059
int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset)
2060
{
2061
    struct kvm_signal_mask *sigmask;
2062
    int r;
2063

    
2064
    if (!sigset) {
2065
        return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL);
2066
    }
2067

    
2068
    sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
2069

    
2070
    sigmask->len = 8;
2071
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
2072
    r = kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, sigmask);
2073
    g_free(sigmask);
2074

    
2075
    return r;
2076
}
2077
int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2078
{
2079
    return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
2080
}
2081

    
2082
int kvm_on_sigbus(int code, void *addr)
2083
{
2084
    return kvm_arch_on_sigbus(code, addr);
2085
}
2086

    
2087
int kvm_create_device(KVMState *s, uint64_t type, bool test)
2088
{
2089
    int ret;
2090
    struct kvm_create_device create_dev;
2091

    
2092
    create_dev.type = type;
2093
    create_dev.fd = -1;
2094
    create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
2095

    
2096
    if (!kvm_check_extension(s, KVM_CAP_DEVICE_CTRL)) {
2097
        return -ENOTSUP;
2098
    }
2099

    
2100
    ret = kvm_vm_ioctl(s, KVM_CREATE_DEVICE, &create_dev);
2101
    if (ret) {
2102
        return ret;
2103
    }
2104

    
2105
    return test ? 0 : create_dev.fd;
2106
}