Statistics
| Branch: | Revision:

root / kvm-all.c @ 92e4b519

History | View | Annotate | Download (42.5 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "qemu-barrier.h"
25
#include "sysemu.h"
26
#include "hw/hw.h"
27
#include "gdbstub.h"
28
#include "kvm.h"
29
#include "bswap.h"
30
#include "memory.h"
31
#include "exec-memory.h"
32

    
33
/* This check must be after config-host.h is included */
34
#ifdef CONFIG_EVENTFD
35
#include <sys/eventfd.h>
36
#endif
37

    
38
/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
39
#define PAGE_SIZE TARGET_PAGE_SIZE
40

    
41
//#define DEBUG_KVM
42

    
43
#ifdef DEBUG_KVM
44
#define DPRINTF(fmt, ...) \
45
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46
#else
47
#define DPRINTF(fmt, ...) \
48
    do { } while (0)
49
#endif
50

    
51
typedef struct KVMSlot
52
{
53
    target_phys_addr_t start_addr;
54
    ram_addr_t memory_size;
55
    void *ram;
56
    int slot;
57
    int flags;
58
} KVMSlot;
59

    
60
typedef struct kvm_dirty_log KVMDirtyLog;
61

    
62
struct KVMState
63
{
64
    KVMSlot slots[32];
65
    int fd;
66
    int vmfd;
67
    int coalesced_mmio;
68
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
69
    bool coalesced_flush_in_progress;
70
    int broken_set_mem_region;
71
    int migration_log;
72
    int vcpu_events;
73
    int robust_singlestep;
74
    int debugregs;
75
#ifdef KVM_CAP_SET_GUEST_DEBUG
76
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
77
#endif
78
    int pit_in_kernel;
79
    int pit_state2;
80
    int xsave, xcrs;
81
    int many_ioeventfds;
82
    /* The man page (and posix) say ioctl numbers are signed int, but
83
     * they're not.  Linux, glibc and *BSD all treat ioctl numbers as
84
     * unsigned, and treating them as signed here can break things */
85
    unsigned irqchip_inject_ioctl;
86
#ifdef KVM_CAP_IRQ_ROUTING
87
    struct kvm_irq_routing *irq_routes;
88
    int nr_allocated_irq_routes;
89
    uint32_t *used_gsi_bitmap;
90
    unsigned int max_gsi;
91
#endif
92
};
93

    
94
KVMState *kvm_state;
95
bool kvm_kernel_irqchip;
96

    
97
static const KVMCapabilityInfo kvm_required_capabilites[] = {
98
    KVM_CAP_INFO(USER_MEMORY),
99
    KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
100
    KVM_CAP_LAST_INFO
101
};
102

    
103
static KVMSlot *kvm_alloc_slot(KVMState *s)
104
{
105
    int i;
106

    
107
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
108
        if (s->slots[i].memory_size == 0) {
109
            return &s->slots[i];
110
        }
111
    }
112

    
113
    fprintf(stderr, "%s: no free slot available\n", __func__);
114
    abort();
115
}
116

    
117
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
118
                                         target_phys_addr_t start_addr,
119
                                         target_phys_addr_t end_addr)
120
{
121
    int i;
122

    
123
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
124
        KVMSlot *mem = &s->slots[i];
125

    
126
        if (start_addr == mem->start_addr &&
127
            end_addr == mem->start_addr + mem->memory_size) {
128
            return mem;
129
        }
130
    }
131

    
132
    return NULL;
133
}
134

    
135
/*
136
 * Find overlapping slot with lowest start address
137
 */
138
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
139
                                            target_phys_addr_t start_addr,
140
                                            target_phys_addr_t end_addr)
141
{
142
    KVMSlot *found = NULL;
143
    int i;
144

    
145
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
146
        KVMSlot *mem = &s->slots[i];
147

    
148
        if (mem->memory_size == 0 ||
149
            (found && found->start_addr < mem->start_addr)) {
150
            continue;
151
        }
152

    
153
        if (end_addr > mem->start_addr &&
154
            start_addr < mem->start_addr + mem->memory_size) {
155
            found = mem;
156
        }
157
    }
158

    
159
    return found;
160
}
161

    
162
int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
163
                                       target_phys_addr_t *phys_addr)
164
{
165
    int i;
166

    
167
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
168
        KVMSlot *mem = &s->slots[i];
169

    
170
        if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
171
            *phys_addr = mem->start_addr + (ram - mem->ram);
172
            return 1;
173
        }
174
    }
175

    
176
    return 0;
177
}
178

    
179
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
180
{
181
    struct kvm_userspace_memory_region mem;
182

    
183
    mem.slot = slot->slot;
184
    mem.guest_phys_addr = slot->start_addr;
185
    mem.memory_size = slot->memory_size;
186
    mem.userspace_addr = (unsigned long)slot->ram;
187
    mem.flags = slot->flags;
188
    if (s->migration_log) {
189
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
190
    }
191
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
192
}
193

    
194
static void kvm_reset_vcpu(void *opaque)
195
{
196
    CPUArchState *env = opaque;
197

    
198
    kvm_arch_reset_vcpu(env);
199
}
200

    
201
int kvm_pit_in_kernel(void)
202
{
203
    return kvm_state->pit_in_kernel;
204
}
205

    
206
int kvm_init_vcpu(CPUArchState *env)
207
{
208
    KVMState *s = kvm_state;
209
    long mmap_size;
210
    int ret;
211

    
212
    DPRINTF("kvm_init_vcpu\n");
213

    
214
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
215
    if (ret < 0) {
216
        DPRINTF("kvm_create_vcpu failed\n");
217
        goto err;
218
    }
219

    
220
    env->kvm_fd = ret;
221
    env->kvm_state = s;
222
    env->kvm_vcpu_dirty = 1;
223

    
224
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
225
    if (mmap_size < 0) {
226
        ret = mmap_size;
227
        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
228
        goto err;
229
    }
230

    
231
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
232
                        env->kvm_fd, 0);
233
    if (env->kvm_run == MAP_FAILED) {
234
        ret = -errno;
235
        DPRINTF("mmap'ing vcpu state failed\n");
236
        goto err;
237
    }
238

    
239
    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
240
        s->coalesced_mmio_ring =
241
            (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
242
    }
243

    
244
    ret = kvm_arch_init_vcpu(env);
245
    if (ret == 0) {
246
        qemu_register_reset(kvm_reset_vcpu, env);
247
        kvm_arch_reset_vcpu(env);
248
    }
249
err:
250
    return ret;
251
}
252

    
253
/*
254
 * dirty pages logging control
255
 */
256

    
257
static int kvm_mem_flags(KVMState *s, bool log_dirty)
258
{
259
    return log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
260
}
261

    
262
static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
263
{
264
    KVMState *s = kvm_state;
265
    int flags, mask = KVM_MEM_LOG_DIRTY_PAGES;
266
    int old_flags;
267

    
268
    old_flags = mem->flags;
269

    
270
    flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty);
271
    mem->flags = flags;
272

    
273
    /* If nothing changed effectively, no need to issue ioctl */
274
    if (s->migration_log) {
275
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
276
    }
277

    
278
    if (flags == old_flags) {
279
        return 0;
280
    }
281

    
282
    return kvm_set_user_memory_region(s, mem);
283
}
284

    
285
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
286
                                      ram_addr_t size, bool log_dirty)
287
{
288
    KVMState *s = kvm_state;
289
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
290

    
291
    if (mem == NULL)  {
292
        fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
293
                TARGET_FMT_plx "\n", __func__, phys_addr,
294
                (target_phys_addr_t)(phys_addr + size - 1));
295
        return -EINVAL;
296
    }
297
    return kvm_slot_dirty_pages_log_change(mem, log_dirty);
298
}
299

    
300
static void kvm_log_start(MemoryListener *listener,
301
                          MemoryRegionSection *section)
302
{
303
    int r;
304

    
305
    r = kvm_dirty_pages_log_change(section->offset_within_address_space,
306
                                   section->size, true);
307
    if (r < 0) {
308
        abort();
309
    }
310
}
311

    
312
static void kvm_log_stop(MemoryListener *listener,
313
                          MemoryRegionSection *section)
314
{
315
    int r;
316

    
317
    r = kvm_dirty_pages_log_change(section->offset_within_address_space,
318
                                   section->size, false);
319
    if (r < 0) {
320
        abort();
321
    }
322
}
323

    
324
static int kvm_set_migration_log(int enable)
325
{
326
    KVMState *s = kvm_state;
327
    KVMSlot *mem;
328
    int i, err;
329

    
330
    s->migration_log = enable;
331

    
332
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
333
        mem = &s->slots[i];
334

    
335
        if (!mem->memory_size) {
336
            continue;
337
        }
338
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
339
            continue;
340
        }
341
        err = kvm_set_user_memory_region(s, mem);
342
        if (err) {
343
            return err;
344
        }
345
    }
346
    return 0;
347
}
348

    
349
/* get kvm's dirty pages bitmap and update qemu's */
350
static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
351
                                         unsigned long *bitmap)
352
{
353
    unsigned int i, j;
354
    unsigned long page_number, c;
355
    target_phys_addr_t addr, addr1;
356
    unsigned int len = ((section->size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
357

    
358
    /*
359
     * bitmap-traveling is faster than memory-traveling (for addr...)
360
     * especially when most of the memory is not dirty.
361
     */
362
    for (i = 0; i < len; i++) {
363
        if (bitmap[i] != 0) {
364
            c = leul_to_cpu(bitmap[i]);
365
            do {
366
                j = ffsl(c) - 1;
367
                c &= ~(1ul << j);
368
                page_number = i * HOST_LONG_BITS + j;
369
                addr1 = page_number * TARGET_PAGE_SIZE;
370
                addr = section->offset_within_region + addr1;
371
                memory_region_set_dirty(section->mr, addr, TARGET_PAGE_SIZE);
372
            } while (c != 0);
373
        }
374
    }
375
    return 0;
376
}
377

    
378
#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))
379

    
380
/**
381
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
382
 * This function updates qemu's dirty bitmap using
383
 * memory_region_set_dirty().  This means all bits are set
384
 * to dirty.
385
 *
386
 * @start_add: start of logged region.
387
 * @end_addr: end of logged region.
388
 */
389
static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
390
{
391
    KVMState *s = kvm_state;
392
    unsigned long size, allocated_size = 0;
393
    KVMDirtyLog d;
394
    KVMSlot *mem;
395
    int ret = 0;
396
    target_phys_addr_t start_addr = section->offset_within_address_space;
397
    target_phys_addr_t end_addr = start_addr + section->size;
398

    
399
    d.dirty_bitmap = NULL;
400
    while (start_addr < end_addr) {
401
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
402
        if (mem == NULL) {
403
            break;
404
        }
405

    
406
        /* XXX bad kernel interface alert
407
         * For dirty bitmap, kernel allocates array of size aligned to
408
         * bits-per-long.  But for case when the kernel is 64bits and
409
         * the userspace is 32bits, userspace can't align to the same
410
         * bits-per-long, since sizeof(long) is different between kernel
411
         * and user space.  This way, userspace will provide buffer which
412
         * may be 4 bytes less than the kernel will use, resulting in
413
         * userspace memory corruption (which is not detectable by valgrind
414
         * too, in most cases).
415
         * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
416
         * a hope that sizeof(long) wont become >8 any time soon.
417
         */
418
        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
419
                     /*HOST_LONG_BITS*/ 64) / 8;
420
        if (!d.dirty_bitmap) {
421
            d.dirty_bitmap = g_malloc(size);
422
        } else if (size > allocated_size) {
423
            d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
424
        }
425
        allocated_size = size;
426
        memset(d.dirty_bitmap, 0, allocated_size);
427

    
428
        d.slot = mem->slot;
429

    
430
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
431
            DPRINTF("ioctl failed %d\n", errno);
432
            ret = -1;
433
            break;
434
        }
435

    
436
        kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
437
        start_addr = mem->start_addr + mem->memory_size;
438
    }
439
    g_free(d.dirty_bitmap);
440

    
441
    return ret;
442
}
443

    
444
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
445
{
446
    int ret = -ENOSYS;
447
    KVMState *s = kvm_state;
448

    
449
    if (s->coalesced_mmio) {
450
        struct kvm_coalesced_mmio_zone zone;
451

    
452
        zone.addr = start;
453
        zone.size = size;
454
        zone.pad = 0;
455

    
456
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
457
    }
458

    
459
    return ret;
460
}
461

    
462
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
463
{
464
    int ret = -ENOSYS;
465
    KVMState *s = kvm_state;
466

    
467
    if (s->coalesced_mmio) {
468
        struct kvm_coalesced_mmio_zone zone;
469

    
470
        zone.addr = start;
471
        zone.size = size;
472
        zone.pad = 0;
473

    
474
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
475
    }
476

    
477
    return ret;
478
}
479

    
480
int kvm_check_extension(KVMState *s, unsigned int extension)
481
{
482
    int ret;
483

    
484
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
485
    if (ret < 0) {
486
        ret = 0;
487
    }
488

    
489
    return ret;
490
}
491

    
492
static int kvm_check_many_ioeventfds(void)
493
{
494
    /* Userspace can use ioeventfd for io notification.  This requires a host
495
     * that supports eventfd(2) and an I/O thread; since eventfd does not
496
     * support SIGIO it cannot interrupt the vcpu.
497
     *
498
     * Older kernels have a 6 device limit on the KVM io bus.  Find out so we
499
     * can avoid creating too many ioeventfds.
500
     */
501
#if defined(CONFIG_EVENTFD)
502
    int ioeventfds[7];
503
    int i, ret = 0;
504
    for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
505
        ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
506
        if (ioeventfds[i] < 0) {
507
            break;
508
        }
509
        ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
510
        if (ret < 0) {
511
            close(ioeventfds[i]);
512
            break;
513
        }
514
    }
515

    
516
    /* Decide whether many devices are supported or not */
517
    ret = i == ARRAY_SIZE(ioeventfds);
518

    
519
    while (i-- > 0) {
520
        kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
521
        close(ioeventfds[i]);
522
    }
523
    return ret;
524
#else
525
    return 0;
526
#endif
527
}
528

    
529
static const KVMCapabilityInfo *
530
kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
531
{
532
    while (list->name) {
533
        if (!kvm_check_extension(s, list->value)) {
534
            return list;
535
        }
536
        list++;
537
    }
538
    return NULL;
539
}
540

    
541
static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
542
{
543
    KVMState *s = kvm_state;
544
    KVMSlot *mem, old;
545
    int err;
546
    MemoryRegion *mr = section->mr;
547
    bool log_dirty = memory_region_is_logging(mr);
548
    target_phys_addr_t start_addr = section->offset_within_address_space;
549
    ram_addr_t size = section->size;
550
    void *ram = NULL;
551
    unsigned delta;
552

    
553
    /* kvm works in page size chunks, but the function may be called
554
       with sub-page size and unaligned start address. */
555
    delta = TARGET_PAGE_ALIGN(size) - size;
556
    if (delta > size) {
557
        return;
558
    }
559
    start_addr += delta;
560
    size -= delta;
561
    size &= TARGET_PAGE_MASK;
562
    if (!size || (start_addr & ~TARGET_PAGE_MASK)) {
563
        return;
564
    }
565

    
566
    if (!memory_region_is_ram(mr)) {
567
        return;
568
    }
569

    
570
    ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta;
571

    
572
    while (1) {
573
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
574
        if (!mem) {
575
            break;
576
        }
577

    
578
        if (add && start_addr >= mem->start_addr &&
579
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
580
            (ram - start_addr == mem->ram - mem->start_addr)) {
581
            /* The new slot fits into the existing one and comes with
582
             * identical parameters - update flags and done. */
583
            kvm_slot_dirty_pages_log_change(mem, log_dirty);
584
            return;
585
        }
586

    
587
        old = *mem;
588

    
589
        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
590
            kvm_physical_sync_dirty_bitmap(section);
591
        }
592

    
593
        /* unregister the overlapping slot */
594
        mem->memory_size = 0;
595
        err = kvm_set_user_memory_region(s, mem);
596
        if (err) {
597
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
598
                    __func__, strerror(-err));
599
            abort();
600
        }
601

    
602
        /* Workaround for older KVM versions: we can't join slots, even not by
603
         * unregistering the previous ones and then registering the larger
604
         * slot. We have to maintain the existing fragmentation. Sigh.
605
         *
606
         * This workaround assumes that the new slot starts at the same
607
         * address as the first existing one. If not or if some overlapping
608
         * slot comes around later, we will fail (not seen in practice so far)
609
         * - and actually require a recent KVM version. */
610
        if (s->broken_set_mem_region &&
611
            old.start_addr == start_addr && old.memory_size < size && add) {
612
            mem = kvm_alloc_slot(s);
613
            mem->memory_size = old.memory_size;
614
            mem->start_addr = old.start_addr;
615
            mem->ram = old.ram;
616
            mem->flags = kvm_mem_flags(s, log_dirty);
617

    
618
            err = kvm_set_user_memory_region(s, mem);
619
            if (err) {
620
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
621
                        strerror(-err));
622
                abort();
623
            }
624

    
625
            start_addr += old.memory_size;
626
            ram += old.memory_size;
627
            size -= old.memory_size;
628
            continue;
629
        }
630

    
631
        /* register prefix slot */
632
        if (old.start_addr < start_addr) {
633
            mem = kvm_alloc_slot(s);
634
            mem->memory_size = start_addr - old.start_addr;
635
            mem->start_addr = old.start_addr;
636
            mem->ram = old.ram;
637
            mem->flags =  kvm_mem_flags(s, log_dirty);
638

    
639
            err = kvm_set_user_memory_region(s, mem);
640
            if (err) {
641
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
642
                        __func__, strerror(-err));
643
#ifdef TARGET_PPC
644
                fprintf(stderr, "%s: This is probably because your kernel's " \
645
                                "PAGE_SIZE is too big. Please try to use 4k " \
646
                                "PAGE_SIZE!\n", __func__);
647
#endif
648
                abort();
649
            }
650
        }
651

    
652
        /* register suffix slot */
653
        if (old.start_addr + old.memory_size > start_addr + size) {
654
            ram_addr_t size_delta;
655

    
656
            mem = kvm_alloc_slot(s);
657
            mem->start_addr = start_addr + size;
658
            size_delta = mem->start_addr - old.start_addr;
659
            mem->memory_size = old.memory_size - size_delta;
660
            mem->ram = old.ram + size_delta;
661
            mem->flags = kvm_mem_flags(s, log_dirty);
662

    
663
            err = kvm_set_user_memory_region(s, mem);
664
            if (err) {
665
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
666
                        __func__, strerror(-err));
667
                abort();
668
            }
669
        }
670
    }
671

    
672
    /* in case the KVM bug workaround already "consumed" the new slot */
673
    if (!size) {
674
        return;
675
    }
676
    if (!add) {
677
        return;
678
    }
679
    mem = kvm_alloc_slot(s);
680
    mem->memory_size = size;
681
    mem->start_addr = start_addr;
682
    mem->ram = ram;
683
    mem->flags = kvm_mem_flags(s, log_dirty);
684

    
685
    err = kvm_set_user_memory_region(s, mem);
686
    if (err) {
687
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
688
                strerror(-err));
689
        abort();
690
    }
691
}
692

    
693
static void kvm_begin(MemoryListener *listener)
694
{
695
}
696

    
697
static void kvm_commit(MemoryListener *listener)
698
{
699
}
700

    
701
static void kvm_region_add(MemoryListener *listener,
702
                           MemoryRegionSection *section)
703
{
704
    kvm_set_phys_mem(section, true);
705
}
706

    
707
static void kvm_region_del(MemoryListener *listener,
708
                           MemoryRegionSection *section)
709
{
710
    kvm_set_phys_mem(section, false);
711
}
712

    
713
static void kvm_region_nop(MemoryListener *listener,
714
                           MemoryRegionSection *section)
715
{
716
}
717

    
718
static void kvm_log_sync(MemoryListener *listener,
719
                         MemoryRegionSection *section)
720
{
721
    int r;
722

    
723
    r = kvm_physical_sync_dirty_bitmap(section);
724
    if (r < 0) {
725
        abort();
726
    }
727
}
728

    
729
static void kvm_log_global_start(struct MemoryListener *listener)
730
{
731
    int r;
732

    
733
    r = kvm_set_migration_log(1);
734
    assert(r >= 0);
735
}
736

    
737
static void kvm_log_global_stop(struct MemoryListener *listener)
738
{
739
    int r;
740

    
741
    r = kvm_set_migration_log(0);
742
    assert(r >= 0);
743
}
744

    
745
static void kvm_mem_ioeventfd_add(MemoryRegionSection *section,
746
                                  bool match_data, uint64_t data, int fd)
747
{
748
    int r;
749

    
750
    assert(match_data && section->size == 4);
751

    
752
    r = kvm_set_ioeventfd_mmio_long(fd, section->offset_within_address_space,
753
                                    data, true);
754
    if (r < 0) {
755
        abort();
756
    }
757
}
758

    
759
static void kvm_mem_ioeventfd_del(MemoryRegionSection *section,
760
                                  bool match_data, uint64_t data, int fd)
761
{
762
    int r;
763

    
764
    r = kvm_set_ioeventfd_mmio_long(fd, section->offset_within_address_space,
765
                                    data, false);
766
    if (r < 0) {
767
        abort();
768
    }
769
}
770

    
771
static void kvm_io_ioeventfd_add(MemoryRegionSection *section,
772
                                 bool match_data, uint64_t data, int fd)
773
{
774
    int r;
775

    
776
    assert(match_data && section->size == 2);
777

    
778
    r = kvm_set_ioeventfd_pio_word(fd, section->offset_within_address_space,
779
                                   data, true);
780
    if (r < 0) {
781
        abort();
782
    }
783
}
784

    
785
static void kvm_io_ioeventfd_del(MemoryRegionSection *section,
786
                                 bool match_data, uint64_t data, int fd)
787

    
788
{
789
    int r;
790

    
791
    r = kvm_set_ioeventfd_pio_word(fd, section->offset_within_address_space,
792
                                   data, false);
793
    if (r < 0) {
794
        abort();
795
    }
796
}
797

    
798
static void kvm_eventfd_add(MemoryListener *listener,
799
                            MemoryRegionSection *section,
800
                            bool match_data, uint64_t data, int fd)
801
{
802
    if (section->address_space == get_system_memory()) {
803
        kvm_mem_ioeventfd_add(section, match_data, data, fd);
804
    } else {
805
        kvm_io_ioeventfd_add(section, match_data, data, fd);
806
    }
807
}
808

    
809
static void kvm_eventfd_del(MemoryListener *listener,
810
                            MemoryRegionSection *section,
811
                            bool match_data, uint64_t data, int fd)
812
{
813
    if (section->address_space == get_system_memory()) {
814
        kvm_mem_ioeventfd_del(section, match_data, data, fd);
815
    } else {
816
        kvm_io_ioeventfd_del(section, match_data, data, fd);
817
    }
818
}
819

    
820
static MemoryListener kvm_memory_listener = {
821
    .begin = kvm_begin,
822
    .commit = kvm_commit,
823
    .region_add = kvm_region_add,
824
    .region_del = kvm_region_del,
825
    .region_nop = kvm_region_nop,
826
    .log_start = kvm_log_start,
827
    .log_stop = kvm_log_stop,
828
    .log_sync = kvm_log_sync,
829
    .log_global_start = kvm_log_global_start,
830
    .log_global_stop = kvm_log_global_stop,
831
    .eventfd_add = kvm_eventfd_add,
832
    .eventfd_del = kvm_eventfd_del,
833
    .priority = 10,
834
};
835

    
836
static void kvm_handle_interrupt(CPUArchState *env, int mask)
837
{
838
    env->interrupt_request |= mask;
839

    
840
    if (!qemu_cpu_is_self(env)) {
841
        qemu_cpu_kick(env);
842
    }
843
}
844

    
845
int kvm_irqchip_set_irq(KVMState *s, int irq, int level)
846
{
847
    struct kvm_irq_level event;
848
    int ret;
849

    
850
    assert(kvm_irqchip_in_kernel());
851

    
852
    event.level = level;
853
    event.irq = irq;
854
    ret = kvm_vm_ioctl(s, s->irqchip_inject_ioctl, &event);
855
    if (ret < 0) {
856
        perror("kvm_set_irqchip_line");
857
        abort();
858
    }
859

    
860
    return (s->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
861
}
862

    
863
#ifdef KVM_CAP_IRQ_ROUTING
864
static void set_gsi(KVMState *s, unsigned int gsi)
865
{
866
    assert(gsi < s->max_gsi);
867

    
868
    s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
869
}
870

    
871
static void kvm_init_irq_routing(KVMState *s)
872
{
873
    int gsi_count;
874

    
875
    gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING);
876
    if (gsi_count > 0) {
877
        unsigned int gsi_bits, i;
878

    
879
        /* Round up so we can search ints using ffs */
880
        gsi_bits = (gsi_count + 31) / 32;
881
        s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
882
        s->max_gsi = gsi_bits;
883

    
884
        /* Mark any over-allocated bits as already in use */
885
        for (i = gsi_count; i < gsi_bits; i++) {
886
            set_gsi(s, i);
887
        }
888
    }
889

    
890
    s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
891
    s->nr_allocated_irq_routes = 0;
892

    
893
    kvm_arch_init_irq_routing(s);
894
}
895

    
896
static void kvm_add_routing_entry(KVMState *s,
897
                                  struct kvm_irq_routing_entry *entry)
898
{
899
    struct kvm_irq_routing_entry *new;
900
    int n, size;
901

    
902
    if (s->irq_routes->nr == s->nr_allocated_irq_routes) {
903
        n = s->nr_allocated_irq_routes * 2;
904
        if (n < 64) {
905
            n = 64;
906
        }
907
        size = sizeof(struct kvm_irq_routing);
908
        size += n * sizeof(*new);
909
        s->irq_routes = g_realloc(s->irq_routes, size);
910
        s->nr_allocated_irq_routes = n;
911
    }
912
    n = s->irq_routes->nr++;
913
    new = &s->irq_routes->entries[n];
914
    memset(new, 0, sizeof(*new));
915
    new->gsi = entry->gsi;
916
    new->type = entry->type;
917
    new->flags = entry->flags;
918
    new->u = entry->u;
919

    
920
    set_gsi(s, entry->gsi);
921
}
922

    
923
void kvm_irqchip_add_route(KVMState *s, int irq, int irqchip, int pin)
924
{
925
    struct kvm_irq_routing_entry e;
926

    
927
    e.gsi = irq;
928
    e.type = KVM_IRQ_ROUTING_IRQCHIP;
929
    e.flags = 0;
930
    e.u.irqchip.irqchip = irqchip;
931
    e.u.irqchip.pin = pin;
932
    kvm_add_routing_entry(s, &e);
933
}
934

    
935
int kvm_irqchip_commit_routes(KVMState *s)
936
{
937
    s->irq_routes->flags = 0;
938
    return kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
939
}
940

    
941
#else /* !KVM_CAP_IRQ_ROUTING */
942

    
943
static void kvm_init_irq_routing(KVMState *s)
944
{
945
}
946
#endif /* !KVM_CAP_IRQ_ROUTING */
947

    
948
static int kvm_irqchip_create(KVMState *s)
949
{
950
    QemuOptsList *list = qemu_find_opts("machine");
951
    int ret;
952

    
953
    if (QTAILQ_EMPTY(&list->head) ||
954
        !qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
955
                           "kernel_irqchip", false) ||
956
        !kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
957
        return 0;
958
    }
959

    
960
    ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
961
    if (ret < 0) {
962
        fprintf(stderr, "Create kernel irqchip failed\n");
963
        return ret;
964
    }
965

    
966
    s->irqchip_inject_ioctl = KVM_IRQ_LINE;
967
    if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
968
        s->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
969
    }
970
    kvm_kernel_irqchip = true;
971

    
972
    kvm_init_irq_routing(s);
973

    
974
    return 0;
975
}
976

    
977
int kvm_init(void)
978
{
979
    static const char upgrade_note[] =
980
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
981
        "(see http://sourceforge.net/projects/kvm).\n";
982
    KVMState *s;
983
    const KVMCapabilityInfo *missing_cap;
984
    int ret;
985
    int i;
986

    
987
    s = g_malloc0(sizeof(KVMState));
988

    
989
#ifdef KVM_CAP_SET_GUEST_DEBUG
990
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
991
#endif
992
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
993
        s->slots[i].slot = i;
994
    }
995
    s->vmfd = -1;
996
    s->fd = qemu_open("/dev/kvm", O_RDWR);
997
    if (s->fd == -1) {
998
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
999
        ret = -errno;
1000
        goto err;
1001
    }
1002

    
1003
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
1004
    if (ret < KVM_API_VERSION) {
1005
        if (ret > 0) {
1006
            ret = -EINVAL;
1007
        }
1008
        fprintf(stderr, "kvm version too old\n");
1009
        goto err;
1010
    }
1011

    
1012
    if (ret > KVM_API_VERSION) {
1013
        ret = -EINVAL;
1014
        fprintf(stderr, "kvm version not supported\n");
1015
        goto err;
1016
    }
1017

    
1018
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
1019
    if (s->vmfd < 0) {
1020
#ifdef TARGET_S390X
1021
        fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
1022
                        "your host kernel command line\n");
1023
#endif
1024
        ret = s->vmfd;
1025
        goto err;
1026
    }
1027

    
1028
    missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
1029
    if (!missing_cap) {
1030
        missing_cap =
1031
            kvm_check_extension_list(s, kvm_arch_required_capabilities);
1032
    }
1033
    if (missing_cap) {
1034
        ret = -EINVAL;
1035
        fprintf(stderr, "kvm does not support %s\n%s",
1036
                missing_cap->name, upgrade_note);
1037
        goto err;
1038
    }
1039

    
1040
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
1041

    
1042
    s->broken_set_mem_region = 1;
1043
    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
1044
    if (ret > 0) {
1045
        s->broken_set_mem_region = 0;
1046
    }
1047

    
1048
#ifdef KVM_CAP_VCPU_EVENTS
1049
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
1050
#endif
1051

    
1052
    s->robust_singlestep =
1053
        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
1054

    
1055
#ifdef KVM_CAP_DEBUGREGS
1056
    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
1057
#endif
1058

    
1059
#ifdef KVM_CAP_XSAVE
1060
    s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
1061
#endif
1062

    
1063
#ifdef KVM_CAP_XCRS
1064
    s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
1065
#endif
1066

    
1067
#ifdef KVM_CAP_PIT_STATE2
1068
    s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
1069
#endif
1070

    
1071
    ret = kvm_arch_init(s);
1072
    if (ret < 0) {
1073
        goto err;
1074
    }
1075

    
1076
    ret = kvm_irqchip_create(s);
1077
    if (ret < 0) {
1078
        goto err;
1079
    }
1080

    
1081
    kvm_state = s;
1082
    memory_listener_register(&kvm_memory_listener, NULL);
1083

    
1084
    s->many_ioeventfds = kvm_check_many_ioeventfds();
1085

    
1086
    cpu_interrupt_handler = kvm_handle_interrupt;
1087

    
1088
    return 0;
1089

    
1090
err:
1091
    if (s) {
1092
        if (s->vmfd >= 0) {
1093
            close(s->vmfd);
1094
        }
1095
        if (s->fd != -1) {
1096
            close(s->fd);
1097
        }
1098
    }
1099
    g_free(s);
1100

    
1101
    return ret;
1102
}
1103

    
1104
static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
1105
                          uint32_t count)
1106
{
1107
    int i;
1108
    uint8_t *ptr = data;
1109

    
1110
    for (i = 0; i < count; i++) {
1111
        if (direction == KVM_EXIT_IO_IN) {
1112
            switch (size) {
1113
            case 1:
1114
                stb_p(ptr, cpu_inb(port));
1115
                break;
1116
            case 2:
1117
                stw_p(ptr, cpu_inw(port));
1118
                break;
1119
            case 4:
1120
                stl_p(ptr, cpu_inl(port));
1121
                break;
1122
            }
1123
        } else {
1124
            switch (size) {
1125
            case 1:
1126
                cpu_outb(port, ldub_p(ptr));
1127
                break;
1128
            case 2:
1129
                cpu_outw(port, lduw_p(ptr));
1130
                break;
1131
            case 4:
1132
                cpu_outl(port, ldl_p(ptr));
1133
                break;
1134
            }
1135
        }
1136

    
1137
        ptr += size;
1138
    }
1139
}
1140

    
1141
static int kvm_handle_internal_error(CPUArchState *env, struct kvm_run *run)
1142
{
1143
    fprintf(stderr, "KVM internal error.");
1144
    if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
1145
        int i;
1146

    
1147
        fprintf(stderr, " Suberror: %d\n", run->internal.suberror);
1148
        for (i = 0; i < run->internal.ndata; ++i) {
1149
            fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
1150
                    i, (uint64_t)run->internal.data[i]);
1151
        }
1152
    } else {
1153
        fprintf(stderr, "\n");
1154
    }
1155
    if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
1156
        fprintf(stderr, "emulation failure\n");
1157
        if (!kvm_arch_stop_on_emulation_error(env)) {
1158
            cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
1159
            return EXCP_INTERRUPT;
1160
        }
1161
    }
1162
    /* FIXME: Should trigger a qmp message to let management know
1163
     * something went wrong.
1164
     */
1165
    return -1;
1166
}
1167

    
1168
void kvm_flush_coalesced_mmio_buffer(void)
1169
{
1170
    KVMState *s = kvm_state;
1171

    
1172
    if (s->coalesced_flush_in_progress) {
1173
        return;
1174
    }
1175

    
1176
    s->coalesced_flush_in_progress = true;
1177

    
1178
    if (s->coalesced_mmio_ring) {
1179
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
1180
        while (ring->first != ring->last) {
1181
            struct kvm_coalesced_mmio *ent;
1182

    
1183
            ent = &ring->coalesced_mmio[ring->first];
1184

    
1185
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
1186
            smp_wmb();
1187
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
1188
        }
1189
    }
1190

    
1191
    s->coalesced_flush_in_progress = false;
1192
}
1193

    
1194
static void do_kvm_cpu_synchronize_state(void *_env)
1195
{
1196
    CPUArchState *env = _env;
1197

    
1198
    if (!env->kvm_vcpu_dirty) {
1199
        kvm_arch_get_registers(env);
1200
        env->kvm_vcpu_dirty = 1;
1201
    }
1202
}
1203

    
1204
void kvm_cpu_synchronize_state(CPUArchState *env)
1205
{
1206
    if (!env->kvm_vcpu_dirty) {
1207
        run_on_cpu(env, do_kvm_cpu_synchronize_state, env);
1208
    }
1209
}
1210

    
1211
void kvm_cpu_synchronize_post_reset(CPUArchState *env)
1212
{
1213
    kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
1214
    env->kvm_vcpu_dirty = 0;
1215
}
1216

    
1217
void kvm_cpu_synchronize_post_init(CPUArchState *env)
1218
{
1219
    kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
1220
    env->kvm_vcpu_dirty = 0;
1221
}
1222

    
1223
int kvm_cpu_exec(CPUArchState *env)
1224
{
1225
    struct kvm_run *run = env->kvm_run;
1226
    int ret, run_ret;
1227

    
1228
    DPRINTF("kvm_cpu_exec()\n");
1229

    
1230
    if (kvm_arch_process_async_events(env)) {
1231
        env->exit_request = 0;
1232
        return EXCP_HLT;
1233
    }
1234

    
1235
    do {
1236
        if (env->kvm_vcpu_dirty) {
1237
            kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
1238
            env->kvm_vcpu_dirty = 0;
1239
        }
1240

    
1241
        kvm_arch_pre_run(env, run);
1242
        if (env->exit_request) {
1243
            DPRINTF("interrupt exit requested\n");
1244
            /*
1245
             * KVM requires us to reenter the kernel after IO exits to complete
1246
             * instruction emulation. This self-signal will ensure that we
1247
             * leave ASAP again.
1248
             */
1249
            qemu_cpu_kick_self();
1250
        }
1251
        qemu_mutex_unlock_iothread();
1252

    
1253
        run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
1254

    
1255
        qemu_mutex_lock_iothread();
1256
        kvm_arch_post_run(env, run);
1257

    
1258
        kvm_flush_coalesced_mmio_buffer();
1259

    
1260
        if (run_ret < 0) {
1261
            if (run_ret == -EINTR || run_ret == -EAGAIN) {
1262
                DPRINTF("io window exit\n");
1263
                ret = EXCP_INTERRUPT;
1264
                break;
1265
            }
1266
            fprintf(stderr, "error: kvm run failed %s\n",
1267
                    strerror(-run_ret));
1268
            abort();
1269
        }
1270

    
1271
        switch (run->exit_reason) {
1272
        case KVM_EXIT_IO:
1273
            DPRINTF("handle_io\n");
1274
            kvm_handle_io(run->io.port,
1275
                          (uint8_t *)run + run->io.data_offset,
1276
                          run->io.direction,
1277
                          run->io.size,
1278
                          run->io.count);
1279
            ret = 0;
1280
            break;
1281
        case KVM_EXIT_MMIO:
1282
            DPRINTF("handle_mmio\n");
1283
            cpu_physical_memory_rw(run->mmio.phys_addr,
1284
                                   run->mmio.data,
1285
                                   run->mmio.len,
1286
                                   run->mmio.is_write);
1287
            ret = 0;
1288
            break;
1289
        case KVM_EXIT_IRQ_WINDOW_OPEN:
1290
            DPRINTF("irq_window_open\n");
1291
            ret = EXCP_INTERRUPT;
1292
            break;
1293
        case KVM_EXIT_SHUTDOWN:
1294
            DPRINTF("shutdown\n");
1295
            qemu_system_reset_request();
1296
            ret = EXCP_INTERRUPT;
1297
            break;
1298
        case KVM_EXIT_UNKNOWN:
1299
            fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
1300
                    (uint64_t)run->hw.hardware_exit_reason);
1301
            ret = -1;
1302
            break;
1303
        case KVM_EXIT_INTERNAL_ERROR:
1304
            ret = kvm_handle_internal_error(env, run);
1305
            break;
1306
        default:
1307
            DPRINTF("kvm_arch_handle_exit\n");
1308
            ret = kvm_arch_handle_exit(env, run);
1309
            break;
1310
        }
1311
    } while (ret == 0);
1312

    
1313
    if (ret < 0) {
1314
        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
1315
        vm_stop(RUN_STATE_INTERNAL_ERROR);
1316
    }
1317

    
1318
    env->exit_request = 0;
1319
    return ret;
1320
}
1321

    
1322
int kvm_ioctl(KVMState *s, int type, ...)
1323
{
1324
    int ret;
1325
    void *arg;
1326
    va_list ap;
1327

    
1328
    va_start(ap, type);
1329
    arg = va_arg(ap, void *);
1330
    va_end(ap);
1331

    
1332
    ret = ioctl(s->fd, type, arg);
1333
    if (ret == -1) {
1334
        ret = -errno;
1335
    }
1336
    return ret;
1337
}
1338

    
1339
int kvm_vm_ioctl(KVMState *s, int type, ...)
1340
{
1341
    int ret;
1342
    void *arg;
1343
    va_list ap;
1344

    
1345
    va_start(ap, type);
1346
    arg = va_arg(ap, void *);
1347
    va_end(ap);
1348

    
1349
    ret = ioctl(s->vmfd, type, arg);
1350
    if (ret == -1) {
1351
        ret = -errno;
1352
    }
1353
    return ret;
1354
}
1355

    
1356
int kvm_vcpu_ioctl(CPUArchState *env, int type, ...)
1357
{
1358
    int ret;
1359
    void *arg;
1360
    va_list ap;
1361

    
1362
    va_start(ap, type);
1363
    arg = va_arg(ap, void *);
1364
    va_end(ap);
1365

    
1366
    ret = ioctl(env->kvm_fd, type, arg);
1367
    if (ret == -1) {
1368
        ret = -errno;
1369
    }
1370
    return ret;
1371
}
1372

    
1373
int kvm_has_sync_mmu(void)
1374
{
1375
    return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
1376
}
1377

    
1378
int kvm_has_vcpu_events(void)
1379
{
1380
    return kvm_state->vcpu_events;
1381
}
1382

    
1383
int kvm_has_robust_singlestep(void)
1384
{
1385
    return kvm_state->robust_singlestep;
1386
}
1387

    
1388
int kvm_has_debugregs(void)
1389
{
1390
    return kvm_state->debugregs;
1391
}
1392

    
1393
int kvm_has_xsave(void)
1394
{
1395
    return kvm_state->xsave;
1396
}
1397

    
1398
int kvm_has_xcrs(void)
1399
{
1400
    return kvm_state->xcrs;
1401
}
1402

    
1403
int kvm_has_pit_state2(void)
1404
{
1405
    return kvm_state->pit_state2;
1406
}
1407

    
1408
int kvm_has_many_ioeventfds(void)
1409
{
1410
    if (!kvm_enabled()) {
1411
        return 0;
1412
    }
1413
    return kvm_state->many_ioeventfds;
1414
}
1415

    
1416
int kvm_has_gsi_routing(void)
1417
{
1418
#ifdef KVM_CAP_IRQ_ROUTING
1419
    return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
1420
#else
1421
    return false;
1422
#endif
1423
}
1424

    
1425
int kvm_allows_irq0_override(void)
1426
{
1427
    return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
1428
}
1429

    
1430
void kvm_setup_guest_memory(void *start, size_t size)
1431
{
1432
    if (!kvm_has_sync_mmu()) {
1433
        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
1434

    
1435
        if (ret) {
1436
            perror("qemu_madvise");
1437
            fprintf(stderr,
1438
                    "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
1439
            exit(1);
1440
        }
1441
    }
1442
}
1443

    
1444
#ifdef KVM_CAP_SET_GUEST_DEBUG
1445
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUArchState *env,
1446
                                                 target_ulong pc)
1447
{
1448
    struct kvm_sw_breakpoint *bp;
1449

    
1450
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
1451
        if (bp->pc == pc) {
1452
            return bp;
1453
        }
1454
    }
1455
    return NULL;
1456
}
1457

    
1458
int kvm_sw_breakpoints_active(CPUArchState *env)
1459
{
1460
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
1461
}
1462

    
1463
struct kvm_set_guest_debug_data {
1464
    struct kvm_guest_debug dbg;
1465
    CPUArchState *env;
1466
    int err;
1467
};
1468

    
1469
static void kvm_invoke_set_guest_debug(void *data)
1470
{
1471
    struct kvm_set_guest_debug_data *dbg_data = data;
1472
    CPUArchState *env = dbg_data->env;
1473

    
1474
    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
1475
}
1476

    
1477
int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
1478
{
1479
    struct kvm_set_guest_debug_data data;
1480

    
1481
    data.dbg.control = reinject_trap;
1482

    
1483
    if (env->singlestep_enabled) {
1484
        data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1485
    }
1486
    kvm_arch_update_guest_debug(env, &data.dbg);
1487
    data.env = env;
1488

    
1489
    run_on_cpu(env, kvm_invoke_set_guest_debug, &data);
1490
    return data.err;
1491
}
1492

    
1493
int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
1494
                          target_ulong len, int type)
1495
{
1496
    struct kvm_sw_breakpoint *bp;
1497
    CPUArchState *env;
1498
    int err;
1499

    
1500
    if (type == GDB_BREAKPOINT_SW) {
1501
        bp = kvm_find_sw_breakpoint(current_env, addr);
1502
        if (bp) {
1503
            bp->use_count++;
1504
            return 0;
1505
        }
1506

    
1507
        bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
1508
        if (!bp) {
1509
            return -ENOMEM;
1510
        }
1511

    
1512
        bp->pc = addr;
1513
        bp->use_count = 1;
1514
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1515
        if (err) {
1516
            g_free(bp);
1517
            return err;
1518
        }
1519

    
1520
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1521
                          bp, entry);
1522
    } else {
1523
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1524
        if (err) {
1525
            return err;
1526
        }
1527
    }
1528

    
1529
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1530
        err = kvm_update_guest_debug(env, 0);
1531
        if (err) {
1532
            return err;
1533
        }
1534
    }
1535
    return 0;
1536
}
1537

    
1538
int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
1539
                          target_ulong len, int type)
1540
{
1541
    struct kvm_sw_breakpoint *bp;
1542
    CPUArchState *env;
1543
    int err;
1544

    
1545
    if (type == GDB_BREAKPOINT_SW) {
1546
        bp = kvm_find_sw_breakpoint(current_env, addr);
1547
        if (!bp) {
1548
            return -ENOENT;
1549
        }
1550

    
1551
        if (bp->use_count > 1) {
1552
            bp->use_count--;
1553
            return 0;
1554
        }
1555

    
1556
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1557
        if (err) {
1558
            return err;
1559
        }
1560

    
1561
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1562
        g_free(bp);
1563
    } else {
1564
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1565
        if (err) {
1566
            return err;
1567
        }
1568
    }
1569

    
1570
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1571
        err = kvm_update_guest_debug(env, 0);
1572
        if (err) {
1573
            return err;
1574
        }
1575
    }
1576
    return 0;
1577
}
1578

    
1579
void kvm_remove_all_breakpoints(CPUArchState *current_env)
1580
{
1581
    struct kvm_sw_breakpoint *bp, *next;
1582
    KVMState *s = current_env->kvm_state;
1583
    CPUArchState *env;
1584

    
1585
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1586
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1587
            /* Try harder to find a CPU that currently sees the breakpoint. */
1588
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
1589
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) {
1590
                    break;
1591
                }
1592
            }
1593
        }
1594
    }
1595
    kvm_arch_remove_all_hw_breakpoints();
1596

    
1597
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1598
        kvm_update_guest_debug(env, 0);
1599
    }
1600
}
1601

    
1602
#else /* !KVM_CAP_SET_GUEST_DEBUG */
1603

    
1604
int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
1605
{
1606
    return -EINVAL;
1607
}
1608

    
1609
int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
1610
                          target_ulong len, int type)
1611
{
1612
    return -EINVAL;
1613
}
1614

    
1615
int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
1616
                          target_ulong len, int type)
1617
{
1618
    return -EINVAL;
1619
}
1620

    
1621
void kvm_remove_all_breakpoints(CPUArchState *current_env)
1622
{
1623
}
1624
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
1625

    
1626
int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset)
1627
{
1628
    struct kvm_signal_mask *sigmask;
1629
    int r;
1630

    
1631
    if (!sigset) {
1632
        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
1633
    }
1634

    
1635
    sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
1636

    
1637
    sigmask->len = 8;
1638
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1639
    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1640
    g_free(sigmask);
1641

    
1642
    return r;
1643
}
1644

    
1645
int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign)
1646
{
1647
    int ret;
1648
    struct kvm_ioeventfd iofd;
1649

    
1650
    iofd.datamatch = val;
1651
    iofd.addr = addr;
1652
    iofd.len = 4;
1653
    iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH;
1654
    iofd.fd = fd;
1655

    
1656
    if (!kvm_enabled()) {
1657
        return -ENOSYS;
1658
    }
1659

    
1660
    if (!assign) {
1661
        iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
1662
    }
1663

    
1664
    ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
1665

    
1666
    if (ret < 0) {
1667
        return -errno;
1668
    }
1669

    
1670
    return 0;
1671
}
1672

    
1673
int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
1674
{
1675
    struct kvm_ioeventfd kick = {
1676
        .datamatch = val,
1677
        .addr = addr,
1678
        .len = 2,
1679
        .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
1680
        .fd = fd,
1681
    };
1682
    int r;
1683
    if (!kvm_enabled()) {
1684
        return -ENOSYS;
1685
    }
1686
    if (!assign) {
1687
        kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
1688
    }
1689
    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
1690
    if (r < 0) {
1691
        return r;
1692
    }
1693
    return 0;
1694
}
1695

    
1696
int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
1697
{
1698
    return kvm_arch_on_sigbus_vcpu(env, code, addr);
1699
}
1700

    
1701
int kvm_on_sigbus(int code, void *addr)
1702
{
1703
    return kvm_arch_on_sigbus(code, addr);
1704
}