Statistics
| Branch: | Revision:

root / kvm-all.c @ b0b1d690

History | View | Annotate | Download (28.9 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "qemu-barrier.h"
25
#include "sysemu.h"
26
#include "hw/hw.h"
27
#include "gdbstub.h"
28
#include "kvm.h"
29

    
30
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
31
#define PAGE_SIZE TARGET_PAGE_SIZE
32

    
33
//#define DEBUG_KVM
34

    
35
#ifdef DEBUG_KVM
36
#define dprintf(fmt, ...) \
37
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
38
#else
39
#define dprintf(fmt, ...) \
40
    do { } while (0)
41
#endif
42

    
43
typedef struct KVMSlot
44
{
45
    target_phys_addr_t start_addr;
46
    ram_addr_t memory_size;
47
    ram_addr_t phys_offset;
48
    int slot;
49
    int flags;
50
} KVMSlot;
51

    
52
typedef struct kvm_dirty_log KVMDirtyLog;
53

    
54
int kvm_allowed = 0;
55

    
56
struct KVMState
57
{
58
    KVMSlot slots[32];
59
    int fd;
60
    int vmfd;
61
    int coalesced_mmio;
62
#ifdef KVM_CAP_COALESCED_MMIO
63
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
64
#endif
65
    int broken_set_mem_region;
66
    int migration_log;
67
    int vcpu_events;
68
    int robust_singlestep;
69
#ifdef KVM_CAP_SET_GUEST_DEBUG
70
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
71
#endif
72
    int irqchip_in_kernel;
73
    int pit_in_kernel;
74
};
75

    
76
static KVMState *kvm_state;
77

    
78
static KVMSlot *kvm_alloc_slot(KVMState *s)
79
{
80
    int i;
81

    
82
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
83
        /* KVM private memory slots */
84
        if (i >= 8 && i < 12)
85
            continue;
86
        if (s->slots[i].memory_size == 0)
87
            return &s->slots[i];
88
    }
89

    
90
    fprintf(stderr, "%s: no free slot available\n", __func__);
91
    abort();
92
}
93

    
94
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
95
                                         target_phys_addr_t start_addr,
96
                                         target_phys_addr_t end_addr)
97
{
98
    int i;
99

    
100
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
101
        KVMSlot *mem = &s->slots[i];
102

    
103
        if (start_addr == mem->start_addr &&
104
            end_addr == mem->start_addr + mem->memory_size) {
105
            return mem;
106
        }
107
    }
108

    
109
    return NULL;
110
}
111

    
112
/*
113
 * Find overlapping slot with lowest start address
114
 */
115
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
116
                                            target_phys_addr_t start_addr,
117
                                            target_phys_addr_t end_addr)
118
{
119
    KVMSlot *found = NULL;
120
    int i;
121

    
122
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
123
        KVMSlot *mem = &s->slots[i];
124

    
125
        if (mem->memory_size == 0 ||
126
            (found && found->start_addr < mem->start_addr)) {
127
            continue;
128
        }
129

    
130
        if (end_addr > mem->start_addr &&
131
            start_addr < mem->start_addr + mem->memory_size) {
132
            found = mem;
133
        }
134
    }
135

    
136
    return found;
137
}
138

    
139
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
140
{
141
    struct kvm_userspace_memory_region mem;
142

    
143
    mem.slot = slot->slot;
144
    mem.guest_phys_addr = slot->start_addr;
145
    mem.memory_size = slot->memory_size;
146
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
147
    mem.flags = slot->flags;
148
    if (s->migration_log) {
149
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
150
    }
151
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
152
}
153

    
154
static void kvm_reset_vcpu(void *opaque)
155
{
156
    CPUState *env = opaque;
157

    
158
    kvm_arch_reset_vcpu(env);
159
    if (kvm_arch_put_registers(env)) {
160
        fprintf(stderr, "Fatal: kvm vcpu reset failed\n");
161
        abort();
162
    }
163
}
164

    
165
int kvm_irqchip_in_kernel(void)
166
{
167
    return kvm_state->irqchip_in_kernel;
168
}
169

    
170
int kvm_pit_in_kernel(void)
171
{
172
    return kvm_state->pit_in_kernel;
173
}
174

    
175

    
176
int kvm_init_vcpu(CPUState *env)
177
{
178
    KVMState *s = kvm_state;
179
    long mmap_size;
180
    int ret;
181

    
182
    dprintf("kvm_init_vcpu\n");
183

    
184
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
185
    if (ret < 0) {
186
        dprintf("kvm_create_vcpu failed\n");
187
        goto err;
188
    }
189

    
190
    env->kvm_fd = ret;
191
    env->kvm_state = s;
192

    
193
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
194
    if (mmap_size < 0) {
195
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
196
        goto err;
197
    }
198

    
199
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
200
                        env->kvm_fd, 0);
201
    if (env->kvm_run == MAP_FAILED) {
202
        ret = -errno;
203
        dprintf("mmap'ing vcpu state failed\n");
204
        goto err;
205
    }
206

    
207
#ifdef KVM_CAP_COALESCED_MMIO
208
    if (s->coalesced_mmio && !s->coalesced_mmio_ring)
209
        s->coalesced_mmio_ring = (void *) env->kvm_run +
210
                s->coalesced_mmio * PAGE_SIZE;
211
#endif
212

    
213
    ret = kvm_arch_init_vcpu(env);
214
    if (ret == 0) {
215
        qemu_register_reset(kvm_reset_vcpu, env);
216
        kvm_arch_reset_vcpu(env);
217
        ret = kvm_arch_put_registers(env);
218
    }
219
err:
220
    return ret;
221
}
222

    
223
/*
224
 * dirty pages logging control
225
 */
226
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
227
                                      ram_addr_t size, int flags, int mask)
228
{
229
    KVMState *s = kvm_state;
230
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
231
    int old_flags;
232

    
233
    if (mem == NULL)  {
234
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
235
                    TARGET_FMT_plx "\n", __func__, phys_addr,
236
                    (target_phys_addr_t)(phys_addr + size - 1));
237
            return -EINVAL;
238
    }
239

    
240
    old_flags = mem->flags;
241

    
242
    flags = (mem->flags & ~mask) | flags;
243
    mem->flags = flags;
244

    
245
    /* If nothing changed effectively, no need to issue ioctl */
246
    if (s->migration_log) {
247
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
248
    }
249
    if (flags == old_flags) {
250
            return 0;
251
    }
252

    
253
    return kvm_set_user_memory_region(s, mem);
254
}
255

    
256
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
257
{
258
        return kvm_dirty_pages_log_change(phys_addr, size,
259
                                          KVM_MEM_LOG_DIRTY_PAGES,
260
                                          KVM_MEM_LOG_DIRTY_PAGES);
261
}
262

    
263
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
264
{
265
        return kvm_dirty_pages_log_change(phys_addr, size,
266
                                          0,
267
                                          KVM_MEM_LOG_DIRTY_PAGES);
268
}
269

    
270
static int kvm_set_migration_log(int enable)
271
{
272
    KVMState *s = kvm_state;
273
    KVMSlot *mem;
274
    int i, err;
275

    
276
    s->migration_log = enable;
277

    
278
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
279
        mem = &s->slots[i];
280

    
281
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
282
            continue;
283
        }
284
        err = kvm_set_user_memory_region(s, mem);
285
        if (err) {
286
            return err;
287
        }
288
    }
289
    return 0;
290
}
291

    
292
static int test_le_bit(unsigned long nr, unsigned char *addr)
293
{
294
    return (addr[nr >> 3] >> (nr & 7)) & 1;
295
}
296

    
297
/**
298
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
299
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
300
 * This means all bits are set to dirty.
301
 *
302
 * @start_add: start of logged region.
303
 * @end_addr: end of logged region.
304
 */
305
static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
306
                                          target_phys_addr_t end_addr)
307
{
308
    KVMState *s = kvm_state;
309
    unsigned long size, allocated_size = 0;
310
    target_phys_addr_t phys_addr;
311
    ram_addr_t addr;
312
    KVMDirtyLog d;
313
    KVMSlot *mem;
314
    int ret = 0;
315

    
316
    d.dirty_bitmap = NULL;
317
    while (start_addr < end_addr) {
318
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
319
        if (mem == NULL) {
320
            break;
321
        }
322

    
323
        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
324
        if (!d.dirty_bitmap) {
325
            d.dirty_bitmap = qemu_malloc(size);
326
        } else if (size > allocated_size) {
327
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
328
        }
329
        allocated_size = size;
330
        memset(d.dirty_bitmap, 0, allocated_size);
331

    
332
        d.slot = mem->slot;
333

    
334
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
335
            dprintf("ioctl failed %d\n", errno);
336
            ret = -1;
337
            break;
338
        }
339

    
340
        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
341
             phys_addr < mem->start_addr + mem->memory_size;
342
             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
343
            unsigned char *bitmap = (unsigned char *)d.dirty_bitmap;
344
            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
345

    
346
            if (test_le_bit(nr, bitmap)) {
347
                cpu_physical_memory_set_dirty(addr);
348
            }
349
        }
350
        start_addr = phys_addr;
351
    }
352
    qemu_free(d.dirty_bitmap);
353

    
354
    return ret;
355
}
356

    
357
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
358
{
359
    int ret = -ENOSYS;
360
#ifdef KVM_CAP_COALESCED_MMIO
361
    KVMState *s = kvm_state;
362

    
363
    if (s->coalesced_mmio) {
364
        struct kvm_coalesced_mmio_zone zone;
365

    
366
        zone.addr = start;
367
        zone.size = size;
368

    
369
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
370
    }
371
#endif
372

    
373
    return ret;
374
}
375

    
376
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
377
{
378
    int ret = -ENOSYS;
379
#ifdef KVM_CAP_COALESCED_MMIO
380
    KVMState *s = kvm_state;
381

    
382
    if (s->coalesced_mmio) {
383
        struct kvm_coalesced_mmio_zone zone;
384

    
385
        zone.addr = start;
386
        zone.size = size;
387

    
388
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
389
    }
390
#endif
391

    
392
    return ret;
393
}
394

    
395
int kvm_check_extension(KVMState *s, unsigned int extension)
396
{
397
    int ret;
398

    
399
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
400
    if (ret < 0) {
401
        ret = 0;
402
    }
403

    
404
    return ret;
405
}
406

    
407
static void kvm_set_phys_mem(target_phys_addr_t start_addr,
408
                             ram_addr_t size,
409
                             ram_addr_t phys_offset)
410
{
411
    KVMState *s = kvm_state;
412
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
413
    KVMSlot *mem, old;
414
    int err;
415

    
416
    if (start_addr & ~TARGET_PAGE_MASK) {
417
        if (flags >= IO_MEM_UNASSIGNED) {
418
            if (!kvm_lookup_overlapping_slot(s, start_addr,
419
                                             start_addr + size)) {
420
                return;
421
            }
422
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
423
        } else {
424
            fprintf(stderr, "Only page-aligned memory slots supported\n");
425
        }
426
        abort();
427
    }
428

    
429
    /* KVM does not support read-only slots */
430
    phys_offset &= ~IO_MEM_ROM;
431

    
432
    while (1) {
433
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
434
        if (!mem) {
435
            break;
436
        }
437

    
438
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
439
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
440
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
441
            /* The new slot fits into the existing one and comes with
442
             * identical parameters - nothing to be done. */
443
            return;
444
        }
445

    
446
        old = *mem;
447

    
448
        /* unregister the overlapping slot */
449
        mem->memory_size = 0;
450
        err = kvm_set_user_memory_region(s, mem);
451
        if (err) {
452
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
453
                    __func__, strerror(-err));
454
            abort();
455
        }
456

    
457
        /* Workaround for older KVM versions: we can't join slots, even not by
458
         * unregistering the previous ones and then registering the larger
459
         * slot. We have to maintain the existing fragmentation. Sigh.
460
         *
461
         * This workaround assumes that the new slot starts at the same
462
         * address as the first existing one. If not or if some overlapping
463
         * slot comes around later, we will fail (not seen in practice so far)
464
         * - and actually require a recent KVM version. */
465
        if (s->broken_set_mem_region &&
466
            old.start_addr == start_addr && old.memory_size < size &&
467
            flags < IO_MEM_UNASSIGNED) {
468
            mem = kvm_alloc_slot(s);
469
            mem->memory_size = old.memory_size;
470
            mem->start_addr = old.start_addr;
471
            mem->phys_offset = old.phys_offset;
472
            mem->flags = 0;
473

    
474
            err = kvm_set_user_memory_region(s, mem);
475
            if (err) {
476
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
477
                        strerror(-err));
478
                abort();
479
            }
480

    
481
            start_addr += old.memory_size;
482
            phys_offset += old.memory_size;
483
            size -= old.memory_size;
484
            continue;
485
        }
486

    
487
        /* register prefix slot */
488
        if (old.start_addr < start_addr) {
489
            mem = kvm_alloc_slot(s);
490
            mem->memory_size = start_addr - old.start_addr;
491
            mem->start_addr = old.start_addr;
492
            mem->phys_offset = old.phys_offset;
493
            mem->flags = 0;
494

    
495
            err = kvm_set_user_memory_region(s, mem);
496
            if (err) {
497
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
498
                        __func__, strerror(-err));
499
                abort();
500
            }
501
        }
502

    
503
        /* register suffix slot */
504
        if (old.start_addr + old.memory_size > start_addr + size) {
505
            ram_addr_t size_delta;
506

    
507
            mem = kvm_alloc_slot(s);
508
            mem->start_addr = start_addr + size;
509
            size_delta = mem->start_addr - old.start_addr;
510
            mem->memory_size = old.memory_size - size_delta;
511
            mem->phys_offset = old.phys_offset + size_delta;
512
            mem->flags = 0;
513

    
514
            err = kvm_set_user_memory_region(s, mem);
515
            if (err) {
516
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
517
                        __func__, strerror(-err));
518
                abort();
519
            }
520
        }
521
    }
522

    
523
    /* in case the KVM bug workaround already "consumed" the new slot */
524
    if (!size)
525
        return;
526

    
527
    /* KVM does not need to know about this memory */
528
    if (flags >= IO_MEM_UNASSIGNED)
529
        return;
530

    
531
    mem = kvm_alloc_slot(s);
532
    mem->memory_size = size;
533
    mem->start_addr = start_addr;
534
    mem->phys_offset = phys_offset;
535
    mem->flags = 0;
536

    
537
    err = kvm_set_user_memory_region(s, mem);
538
    if (err) {
539
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
540
                strerror(-err));
541
        abort();
542
    }
543
}
544

    
545
static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
546
                                  target_phys_addr_t start_addr,
547
                                  ram_addr_t size,
548
                                  ram_addr_t phys_offset)
549
{
550
        kvm_set_phys_mem(start_addr, size, phys_offset);
551
}
552

    
553
static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
554
                                        target_phys_addr_t start_addr,
555
                                        target_phys_addr_t end_addr)
556
{
557
        return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
558
}
559

    
560
static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
561
                                    int enable)
562
{
563
        return kvm_set_migration_log(enable);
564
}
565

    
566
static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
567
        .set_memory = kvm_client_set_memory,
568
        .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
569
        .migration_log = kvm_client_migration_log,
570
};
571

    
572
int kvm_init(int smp_cpus)
573
{
574
    static const char upgrade_note[] =
575
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
576
        "(see http://sourceforge.net/projects/kvm).\n";
577
    KVMState *s;
578
    int ret;
579
    int i;
580

    
581
    if (smp_cpus > 1) {
582
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
583
        return -EINVAL;
584
    }
585

    
586
    s = qemu_mallocz(sizeof(KVMState));
587

    
588
#ifdef KVM_CAP_SET_GUEST_DEBUG
589
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
590
#endif
591
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
592
        s->slots[i].slot = i;
593

    
594
    s->vmfd = -1;
595
    s->fd = qemu_open("/dev/kvm", O_RDWR);
596
    if (s->fd == -1) {
597
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
598
        ret = -errno;
599
        goto err;
600
    }
601

    
602
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
603
    if (ret < KVM_API_VERSION) {
604
        if (ret > 0)
605
            ret = -EINVAL;
606
        fprintf(stderr, "kvm version too old\n");
607
        goto err;
608
    }
609

    
610
    if (ret > KVM_API_VERSION) {
611
        ret = -EINVAL;
612
        fprintf(stderr, "kvm version not supported\n");
613
        goto err;
614
    }
615

    
616
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
617
    if (s->vmfd < 0)
618
        goto err;
619

    
620
    /* initially, KVM allocated its own memory and we had to jump through
621
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
622
     * just use a user allocated buffer so we can use regular pages
623
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
624
     */
625
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
626
        ret = -EINVAL;
627
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
628
                upgrade_note);
629
        goto err;
630
    }
631

    
632
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
633
     * destroyed properly.  Since we rely on this capability, refuse to work
634
     * with any kernel without this capability. */
635
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
636
        ret = -EINVAL;
637

    
638
        fprintf(stderr,
639
                "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
640
                upgrade_note);
641
        goto err;
642
    }
643

    
644
    s->coalesced_mmio = 0;
645
#ifdef KVM_CAP_COALESCED_MMIO
646
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
647
    s->coalesced_mmio_ring = NULL;
648
#endif
649

    
650
    s->broken_set_mem_region = 1;
651
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
652
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
653
    if (ret > 0) {
654
        s->broken_set_mem_region = 0;
655
    }
656
#endif
657

    
658
    s->vcpu_events = 0;
659
#ifdef KVM_CAP_VCPU_EVENTS
660
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
661
#endif
662

    
663
    s->robust_singlestep = 0;
664
#ifdef KVM_CAP_X86_ROBUST_SINGLESTEP
665
    s->robust_singlestep =
666
        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
667
#endif
668

    
669
    ret = kvm_arch_init(s, smp_cpus);
670
    if (ret < 0)
671
        goto err;
672

    
673
    kvm_state = s;
674
    cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
675

    
676
    return 0;
677

    
678
err:
679
    if (s) {
680
        if (s->vmfd != -1)
681
            close(s->vmfd);
682
        if (s->fd != -1)
683
            close(s->fd);
684
    }
685
    qemu_free(s);
686

    
687
    return ret;
688
}
689

    
690
static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
691
                         uint32_t count)
692
{
693
    int i;
694
    uint8_t *ptr = data;
695

    
696
    for (i = 0; i < count; i++) {
697
        if (direction == KVM_EXIT_IO_IN) {
698
            switch (size) {
699
            case 1:
700
                stb_p(ptr, cpu_inb(port));
701
                break;
702
            case 2:
703
                stw_p(ptr, cpu_inw(port));
704
                break;
705
            case 4:
706
                stl_p(ptr, cpu_inl(port));
707
                break;
708
            }
709
        } else {
710
            switch (size) {
711
            case 1:
712
                cpu_outb(port, ldub_p(ptr));
713
                break;
714
            case 2:
715
                cpu_outw(port, lduw_p(ptr));
716
                break;
717
            case 4:
718
                cpu_outl(port, ldl_p(ptr));
719
                break;
720
            }
721
        }
722

    
723
        ptr += size;
724
    }
725

    
726
    return 1;
727
}
728

    
729
void kvm_flush_coalesced_mmio_buffer(void)
730
{
731
#ifdef KVM_CAP_COALESCED_MMIO
732
    KVMState *s = kvm_state;
733
    if (s->coalesced_mmio_ring) {
734
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
735
        while (ring->first != ring->last) {
736
            struct kvm_coalesced_mmio *ent;
737

    
738
            ent = &ring->coalesced_mmio[ring->first];
739

    
740
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
741
            smp_wmb();
742
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
743
        }
744
    }
745
#endif
746
}
747

    
748
void kvm_cpu_synchronize_state(CPUState *env)
749
{
750
    if (!env->kvm_vcpu_dirty) {
751
        kvm_arch_get_registers(env);
752
        env->kvm_vcpu_dirty = 1;
753
    }
754
}
755

    
756
int kvm_cpu_exec(CPUState *env)
757
{
758
    struct kvm_run *run = env->kvm_run;
759
    int ret;
760

    
761
    dprintf("kvm_cpu_exec()\n");
762

    
763
    do {
764
#ifndef CONFIG_IOTHREAD
765
        if (env->exit_request) {
766
            dprintf("interrupt exit requested\n");
767
            ret = 0;
768
            break;
769
        }
770
#endif
771

    
772
        if (env->kvm_vcpu_dirty) {
773
            kvm_arch_put_registers(env);
774
            env->kvm_vcpu_dirty = 0;
775
        }
776

    
777
        kvm_arch_pre_run(env, run);
778
        qemu_mutex_unlock_iothread();
779
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
780
        qemu_mutex_lock_iothread();
781
        kvm_arch_post_run(env, run);
782

    
783
        if (ret == -EINTR || ret == -EAGAIN) {
784
            cpu_exit(env);
785
            dprintf("io window exit\n");
786
            ret = 0;
787
            break;
788
        }
789

    
790
        if (ret < 0) {
791
            dprintf("kvm run failed %s\n", strerror(-ret));
792
            abort();
793
        }
794

    
795
        kvm_flush_coalesced_mmio_buffer();
796

    
797
        ret = 0; /* exit loop */
798
        switch (run->exit_reason) {
799
        case KVM_EXIT_IO:
800
            dprintf("handle_io\n");
801
            ret = kvm_handle_io(run->io.port,
802
                                (uint8_t *)run + run->io.data_offset,
803
                                run->io.direction,
804
                                run->io.size,
805
                                run->io.count);
806
            break;
807
        case KVM_EXIT_MMIO:
808
            dprintf("handle_mmio\n");
809
            cpu_physical_memory_rw(run->mmio.phys_addr,
810
                                   run->mmio.data,
811
                                   run->mmio.len,
812
                                   run->mmio.is_write);
813
            ret = 1;
814
            break;
815
        case KVM_EXIT_IRQ_WINDOW_OPEN:
816
            dprintf("irq_window_open\n");
817
            break;
818
        case KVM_EXIT_SHUTDOWN:
819
            dprintf("shutdown\n");
820
            qemu_system_reset_request();
821
            ret = 1;
822
            break;
823
        case KVM_EXIT_UNKNOWN:
824
            dprintf("kvm_exit_unknown\n");
825
            break;
826
        case KVM_EXIT_FAIL_ENTRY:
827
            dprintf("kvm_exit_fail_entry\n");
828
            break;
829
        case KVM_EXIT_EXCEPTION:
830
            dprintf("kvm_exit_exception\n");
831
            break;
832
        case KVM_EXIT_DEBUG:
833
            dprintf("kvm_exit_debug\n");
834
#ifdef KVM_CAP_SET_GUEST_DEBUG
835
            if (kvm_arch_debug(&run->debug.arch)) {
836
                gdb_set_stop_cpu(env);
837
                vm_stop(EXCP_DEBUG);
838
                env->exception_index = EXCP_DEBUG;
839
                return 0;
840
            }
841
            /* re-enter, this exception was guest-internal */
842
            ret = 1;
843
#endif /* KVM_CAP_SET_GUEST_DEBUG */
844
            break;
845
        default:
846
            dprintf("kvm_arch_handle_exit\n");
847
            ret = kvm_arch_handle_exit(env, run);
848
            break;
849
        }
850
    } while (ret > 0);
851

    
852
    if (env->exit_request) {
853
        env->exit_request = 0;
854
        env->exception_index = EXCP_INTERRUPT;
855
    }
856

    
857
    return ret;
858
}
859

    
860
int kvm_ioctl(KVMState *s, int type, ...)
861
{
862
    int ret;
863
    void *arg;
864
    va_list ap;
865

    
866
    va_start(ap, type);
867
    arg = va_arg(ap, void *);
868
    va_end(ap);
869

    
870
    ret = ioctl(s->fd, type, arg);
871
    if (ret == -1)
872
        ret = -errno;
873

    
874
    return ret;
875
}
876

    
877
int kvm_vm_ioctl(KVMState *s, int type, ...)
878
{
879
    int ret;
880
    void *arg;
881
    va_list ap;
882

    
883
    va_start(ap, type);
884
    arg = va_arg(ap, void *);
885
    va_end(ap);
886

    
887
    ret = ioctl(s->vmfd, type, arg);
888
    if (ret == -1)
889
        ret = -errno;
890

    
891
    return ret;
892
}
893

    
894
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
895
{
896
    int ret;
897
    void *arg;
898
    va_list ap;
899

    
900
    va_start(ap, type);
901
    arg = va_arg(ap, void *);
902
    va_end(ap);
903

    
904
    ret = ioctl(env->kvm_fd, type, arg);
905
    if (ret == -1)
906
        ret = -errno;
907

    
908
    return ret;
909
}
910

    
911
int kvm_has_sync_mmu(void)
912
{
913
#ifdef KVM_CAP_SYNC_MMU
914
    KVMState *s = kvm_state;
915

    
916
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
917
#else
918
    return 0;
919
#endif
920
}
921

    
922
int kvm_has_vcpu_events(void)
923
{
924
    return kvm_state->vcpu_events;
925
}
926

    
927
int kvm_has_robust_singlestep(void)
928
{
929
    return kvm_state->robust_singlestep;
930
}
931

    
932
void kvm_setup_guest_memory(void *start, size_t size)
933
{
934
    if (!kvm_has_sync_mmu()) {
935
#ifdef MADV_DONTFORK
936
        int ret = madvise(start, size, MADV_DONTFORK);
937

    
938
        if (ret) {
939
            perror("madvice");
940
            exit(1);
941
        }
942
#else
943
        fprintf(stderr,
944
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
945
        exit(1);
946
#endif
947
    }
948
}
949

    
950
#ifdef KVM_CAP_SET_GUEST_DEBUG
951
static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
952
{
953
#ifdef CONFIG_IOTHREAD
954
    if (env != cpu_single_env) {
955
        abort();
956
    }
957
#endif
958
    func(data);
959
}
960

    
961
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
962
                                                 target_ulong pc)
963
{
964
    struct kvm_sw_breakpoint *bp;
965

    
966
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
967
        if (bp->pc == pc)
968
            return bp;
969
    }
970
    return NULL;
971
}
972

    
973
int kvm_sw_breakpoints_active(CPUState *env)
974
{
975
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
976
}
977

    
978
struct kvm_set_guest_debug_data {
979
    struct kvm_guest_debug dbg;
980
    CPUState *env;
981
    int err;
982
};
983

    
984
static void kvm_invoke_set_guest_debug(void *data)
985
{
986
    struct kvm_set_guest_debug_data *dbg_data = data;
987
    CPUState *env = dbg_data->env;
988

    
989
    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
990
}
991

    
992
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
993
{
994
    struct kvm_set_guest_debug_data data;
995

    
996
    data.dbg.control = reinject_trap;
997

    
998
    if (env->singlestep_enabled) {
999
        data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1000
    }
1001
    kvm_arch_update_guest_debug(env, &data.dbg);
1002
    data.env = env;
1003

    
1004
    on_vcpu(env, kvm_invoke_set_guest_debug, &data);
1005
    return data.err;
1006
}
1007

    
1008
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1009
                          target_ulong len, int type)
1010
{
1011
    struct kvm_sw_breakpoint *bp;
1012
    CPUState *env;
1013
    int err;
1014

    
1015
    if (type == GDB_BREAKPOINT_SW) {
1016
        bp = kvm_find_sw_breakpoint(current_env, addr);
1017
        if (bp) {
1018
            bp->use_count++;
1019
            return 0;
1020
        }
1021

    
1022
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1023
        if (!bp)
1024
            return -ENOMEM;
1025

    
1026
        bp->pc = addr;
1027
        bp->use_count = 1;
1028
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1029
        if (err) {
1030
            free(bp);
1031
            return err;
1032
        }
1033

    
1034
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1035
                          bp, entry);
1036
    } else {
1037
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1038
        if (err)
1039
            return err;
1040
    }
1041

    
1042
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1043
        err = kvm_update_guest_debug(env, 0);
1044
        if (err)
1045
            return err;
1046
    }
1047
    return 0;
1048
}
1049

    
1050
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1051
                          target_ulong len, int type)
1052
{
1053
    struct kvm_sw_breakpoint *bp;
1054
    CPUState *env;
1055
    int err;
1056

    
1057
    if (type == GDB_BREAKPOINT_SW) {
1058
        bp = kvm_find_sw_breakpoint(current_env, addr);
1059
        if (!bp)
1060
            return -ENOENT;
1061

    
1062
        if (bp->use_count > 1) {
1063
            bp->use_count--;
1064
            return 0;
1065
        }
1066

    
1067
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1068
        if (err)
1069
            return err;
1070

    
1071
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1072
        qemu_free(bp);
1073
    } else {
1074
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1075
        if (err)
1076
            return err;
1077
    }
1078

    
1079
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1080
        err = kvm_update_guest_debug(env, 0);
1081
        if (err)
1082
            return err;
1083
    }
1084
    return 0;
1085
}
1086

    
1087
void kvm_remove_all_breakpoints(CPUState *current_env)
1088
{
1089
    struct kvm_sw_breakpoint *bp, *next;
1090
    KVMState *s = current_env->kvm_state;
1091
    CPUState *env;
1092

    
1093
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1094
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1095
            /* Try harder to find a CPU that currently sees the breakpoint. */
1096
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
1097
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1098
                    break;
1099
            }
1100
        }
1101
    }
1102
    kvm_arch_remove_all_hw_breakpoints();
1103

    
1104
    for (env = first_cpu; env != NULL; env = env->next_cpu)
1105
        kvm_update_guest_debug(env, 0);
1106
}
1107

    
1108
#else /* !KVM_CAP_SET_GUEST_DEBUG */
1109

    
1110
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1111
{
1112
    return -EINVAL;
1113
}
1114

    
1115
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1116
                          target_ulong len, int type)
1117
{
1118
    return -EINVAL;
1119
}
1120

    
1121
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1122
                          target_ulong len, int type)
1123
{
1124
    return -EINVAL;
1125
}
1126

    
1127
void kvm_remove_all_breakpoints(CPUState *current_env)
1128
{
1129
}
1130
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
1131

    
1132
int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
1133
{
1134
    struct kvm_signal_mask *sigmask;
1135
    int r;
1136

    
1137
    if (!sigset)
1138
        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
1139

    
1140
    sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1141

    
1142
    sigmask->len = 8;
1143
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1144
    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1145
    free(sigmask);
1146

    
1147
    return r;
1148
}