Statistics
| Branch: | Revision:

root / kvm-all.c @ cc84de95

History | View | Annotate | Download (28.7 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "hw/hw.h"
26
#include "gdbstub.h"
27
#include "kvm.h"
28

    
29
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
30
#define PAGE_SIZE TARGET_PAGE_SIZE
31

    
32
//#define DEBUG_KVM
33

    
34
#ifdef DEBUG_KVM
35
#define dprintf(fmt, ...) \
36
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
37
#else
38
#define dprintf(fmt, ...) \
39
    do { } while (0)
40
#endif
41

    
42
typedef struct KVMSlot
43
{
44
    target_phys_addr_t start_addr;
45
    ram_addr_t memory_size;
46
    ram_addr_t phys_offset;
47
    int slot;
48
    int flags;
49
} KVMSlot;
50

    
51
typedef struct kvm_dirty_log KVMDirtyLog;
52

    
53
int kvm_allowed = 0;
54

    
55
struct KVMState
56
{
57
    KVMSlot slots[32];
58
    int fd;
59
    int vmfd;
60
    int coalesced_mmio;
61
#ifdef KVM_CAP_COALESCED_MMIO
62
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
63
#endif
64
    int broken_set_mem_region;
65
    int migration_log;
66
    int vcpu_events;
67
#ifdef KVM_CAP_SET_GUEST_DEBUG
68
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
69
#endif
70
    int irqchip_in_kernel;
71
    int pit_in_kernel;
72
};
73

    
74
static KVMState *kvm_state;
75

    
76
static KVMSlot *kvm_alloc_slot(KVMState *s)
77
{
78
    int i;
79

    
80
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
81
        /* KVM private memory slots */
82
        if (i >= 8 && i < 12)
83
            continue;
84
        if (s->slots[i].memory_size == 0)
85
            return &s->slots[i];
86
    }
87

    
88
    fprintf(stderr, "%s: no free slot available\n", __func__);
89
    abort();
90
}
91

    
92
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
93
                                         target_phys_addr_t start_addr,
94
                                         target_phys_addr_t end_addr)
95
{
96
    int i;
97

    
98
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
99
        KVMSlot *mem = &s->slots[i];
100

    
101
        if (start_addr == mem->start_addr &&
102
            end_addr == mem->start_addr + mem->memory_size) {
103
            return mem;
104
        }
105
    }
106

    
107
    return NULL;
108
}
109

    
110
/*
111
 * Find overlapping slot with lowest start address
112
 */
113
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
114
                                            target_phys_addr_t start_addr,
115
                                            target_phys_addr_t end_addr)
116
{
117
    KVMSlot *found = NULL;
118
    int i;
119

    
120
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
121
        KVMSlot *mem = &s->slots[i];
122

    
123
        if (mem->memory_size == 0 ||
124
            (found && found->start_addr < mem->start_addr)) {
125
            continue;
126
        }
127

    
128
        if (end_addr > mem->start_addr &&
129
            start_addr < mem->start_addr + mem->memory_size) {
130
            found = mem;
131
        }
132
    }
133

    
134
    return found;
135
}
136

    
137
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
138
{
139
    struct kvm_userspace_memory_region mem;
140

    
141
    mem.slot = slot->slot;
142
    mem.guest_phys_addr = slot->start_addr;
143
    mem.memory_size = slot->memory_size;
144
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
145
    mem.flags = slot->flags;
146
    if (s->migration_log) {
147
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
148
    }
149
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
150
}
151

    
152
static void kvm_reset_vcpu(void *opaque)
153
{
154
    CPUState *env = opaque;
155

    
156
    kvm_arch_reset_vcpu(env);
157
    if (kvm_arch_put_registers(env)) {
158
        fprintf(stderr, "Fatal: kvm vcpu reset failed\n");
159
        abort();
160
    }
161
}
162

    
163
int kvm_irqchip_in_kernel(void)
164
{
165
    return kvm_state->irqchip_in_kernel;
166
}
167

    
168
int kvm_pit_in_kernel(void)
169
{
170
    return kvm_state->pit_in_kernel;
171
}
172

    
173

    
174
int kvm_init_vcpu(CPUState *env)
175
{
176
    KVMState *s = kvm_state;
177
    long mmap_size;
178
    int ret;
179

    
180
    dprintf("kvm_init_vcpu\n");
181

    
182
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
183
    if (ret < 0) {
184
        dprintf("kvm_create_vcpu failed\n");
185
        goto err;
186
    }
187

    
188
    env->kvm_fd = ret;
189
    env->kvm_state = s;
190

    
191
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
192
    if (mmap_size < 0) {
193
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
194
        goto err;
195
    }
196

    
197
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
198
                        env->kvm_fd, 0);
199
    if (env->kvm_run == MAP_FAILED) {
200
        ret = -errno;
201
        dprintf("mmap'ing vcpu state failed\n");
202
        goto err;
203
    }
204

    
205
#ifdef KVM_CAP_COALESCED_MMIO
206
    if (s->coalesced_mmio && !s->coalesced_mmio_ring)
207
        s->coalesced_mmio_ring = (void *) env->kvm_run +
208
                s->coalesced_mmio * PAGE_SIZE;
209
#endif
210

    
211
    ret = kvm_arch_init_vcpu(env);
212
    if (ret == 0) {
213
        qemu_register_reset(kvm_reset_vcpu, env);
214
        kvm_arch_reset_vcpu(env);
215
        ret = kvm_arch_put_registers(env);
216
    }
217
err:
218
    return ret;
219
}
220

    
221
/*
222
 * dirty pages logging control
223
 */
224
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
225
                                      ram_addr_t size, int flags, int mask)
226
{
227
    KVMState *s = kvm_state;
228
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
229
    int old_flags;
230

    
231
    if (mem == NULL)  {
232
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
233
                    TARGET_FMT_plx "\n", __func__, phys_addr,
234
                    (target_phys_addr_t)(phys_addr + size - 1));
235
            return -EINVAL;
236
    }
237

    
238
    old_flags = mem->flags;
239

    
240
    flags = (mem->flags & ~mask) | flags;
241
    mem->flags = flags;
242

    
243
    /* If nothing changed effectively, no need to issue ioctl */
244
    if (s->migration_log) {
245
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
246
    }
247
    if (flags == old_flags) {
248
            return 0;
249
    }
250

    
251
    return kvm_set_user_memory_region(s, mem);
252
}
253

    
254
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
255
{
256
        return kvm_dirty_pages_log_change(phys_addr, size,
257
                                          KVM_MEM_LOG_DIRTY_PAGES,
258
                                          KVM_MEM_LOG_DIRTY_PAGES);
259
}
260

    
261
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
262
{
263
        return kvm_dirty_pages_log_change(phys_addr, size,
264
                                          0,
265
                                          KVM_MEM_LOG_DIRTY_PAGES);
266
}
267

    
268
static int kvm_set_migration_log(int enable)
269
{
270
    KVMState *s = kvm_state;
271
    KVMSlot *mem;
272
    int i, err;
273

    
274
    s->migration_log = enable;
275

    
276
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
277
        mem = &s->slots[i];
278

    
279
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
280
            continue;
281
        }
282
        err = kvm_set_user_memory_region(s, mem);
283
        if (err) {
284
            return err;
285
        }
286
    }
287
    return 0;
288
}
289

    
290
static int test_le_bit(unsigned long nr, unsigned char *addr)
291
{
292
    return (addr[nr >> 3] >> (nr & 7)) & 1;
293
}
294

    
295
/**
296
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
297
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
298
 * This means all bits are set to dirty.
299
 *
300
 * @start_add: start of logged region.
301
 * @end_addr: end of logged region.
302
 */
303
static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
304
                                          target_phys_addr_t end_addr)
305
{
306
    KVMState *s = kvm_state;
307
    unsigned long size, allocated_size = 0;
308
    target_phys_addr_t phys_addr;
309
    ram_addr_t addr;
310
    KVMDirtyLog d;
311
    KVMSlot *mem;
312
    int ret = 0;
313

    
314
    d.dirty_bitmap = NULL;
315
    while (start_addr < end_addr) {
316
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
317
        if (mem == NULL) {
318
            break;
319
        }
320

    
321
        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
322
        if (!d.dirty_bitmap) {
323
            d.dirty_bitmap = qemu_malloc(size);
324
        } else if (size > allocated_size) {
325
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
326
        }
327
        allocated_size = size;
328
        memset(d.dirty_bitmap, 0, allocated_size);
329

    
330
        d.slot = mem->slot;
331

    
332
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
333
            dprintf("ioctl failed %d\n", errno);
334
            ret = -1;
335
            break;
336
        }
337

    
338
        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
339
             phys_addr < mem->start_addr + mem->memory_size;
340
             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
341
            unsigned char *bitmap = (unsigned char *)d.dirty_bitmap;
342
            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
343

    
344
            if (test_le_bit(nr, bitmap)) {
345
                cpu_physical_memory_set_dirty(addr);
346
            }
347
        }
348
        start_addr = phys_addr;
349
    }
350
    qemu_free(d.dirty_bitmap);
351

    
352
    return ret;
353
}
354

    
355
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
356
{
357
    int ret = -ENOSYS;
358
#ifdef KVM_CAP_COALESCED_MMIO
359
    KVMState *s = kvm_state;
360

    
361
    if (s->coalesced_mmio) {
362
        struct kvm_coalesced_mmio_zone zone;
363

    
364
        zone.addr = start;
365
        zone.size = size;
366

    
367
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
368
    }
369
#endif
370

    
371
    return ret;
372
}
373

    
374
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
375
{
376
    int ret = -ENOSYS;
377
#ifdef KVM_CAP_COALESCED_MMIO
378
    KVMState *s = kvm_state;
379

    
380
    if (s->coalesced_mmio) {
381
        struct kvm_coalesced_mmio_zone zone;
382

    
383
        zone.addr = start;
384
        zone.size = size;
385

    
386
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
387
    }
388
#endif
389

    
390
    return ret;
391
}
392

    
393
int kvm_check_extension(KVMState *s, unsigned int extension)
394
{
395
    int ret;
396

    
397
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
398
    if (ret < 0) {
399
        ret = 0;
400
    }
401

    
402
    return ret;
403
}
404

    
405
static void kvm_set_phys_mem(target_phys_addr_t start_addr,
406
                             ram_addr_t size,
407
                             ram_addr_t phys_offset)
408
{
409
    KVMState *s = kvm_state;
410
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
411
    KVMSlot *mem, old;
412
    int err;
413

    
414
    if (start_addr & ~TARGET_PAGE_MASK) {
415
        if (flags >= IO_MEM_UNASSIGNED) {
416
            if (!kvm_lookup_overlapping_slot(s, start_addr,
417
                                             start_addr + size)) {
418
                return;
419
            }
420
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
421
        } else {
422
            fprintf(stderr, "Only page-aligned memory slots supported\n");
423
        }
424
        abort();
425
    }
426

    
427
    /* KVM does not support read-only slots */
428
    phys_offset &= ~IO_MEM_ROM;
429

    
430
    while (1) {
431
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
432
        if (!mem) {
433
            break;
434
        }
435

    
436
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
437
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
438
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
439
            /* The new slot fits into the existing one and comes with
440
             * identical parameters - nothing to be done. */
441
            return;
442
        }
443

    
444
        old = *mem;
445

    
446
        /* unregister the overlapping slot */
447
        mem->memory_size = 0;
448
        err = kvm_set_user_memory_region(s, mem);
449
        if (err) {
450
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
451
                    __func__, strerror(-err));
452
            abort();
453
        }
454

    
455
        /* Workaround for older KVM versions: we can't join slots, even not by
456
         * unregistering the previous ones and then registering the larger
457
         * slot. We have to maintain the existing fragmentation. Sigh.
458
         *
459
         * This workaround assumes that the new slot starts at the same
460
         * address as the first existing one. If not or if some overlapping
461
         * slot comes around later, we will fail (not seen in practice so far)
462
         * - and actually require a recent KVM version. */
463
        if (s->broken_set_mem_region &&
464
            old.start_addr == start_addr && old.memory_size < size &&
465
            flags < IO_MEM_UNASSIGNED) {
466
            mem = kvm_alloc_slot(s);
467
            mem->memory_size = old.memory_size;
468
            mem->start_addr = old.start_addr;
469
            mem->phys_offset = old.phys_offset;
470
            mem->flags = 0;
471

    
472
            err = kvm_set_user_memory_region(s, mem);
473
            if (err) {
474
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
475
                        strerror(-err));
476
                abort();
477
            }
478

    
479
            start_addr += old.memory_size;
480
            phys_offset += old.memory_size;
481
            size -= old.memory_size;
482
            continue;
483
        }
484

    
485
        /* register prefix slot */
486
        if (old.start_addr < start_addr) {
487
            mem = kvm_alloc_slot(s);
488
            mem->memory_size = start_addr - old.start_addr;
489
            mem->start_addr = old.start_addr;
490
            mem->phys_offset = old.phys_offset;
491
            mem->flags = 0;
492

    
493
            err = kvm_set_user_memory_region(s, mem);
494
            if (err) {
495
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
496
                        __func__, strerror(-err));
497
                abort();
498
            }
499
        }
500

    
501
        /* register suffix slot */
502
        if (old.start_addr + old.memory_size > start_addr + size) {
503
            ram_addr_t size_delta;
504

    
505
            mem = kvm_alloc_slot(s);
506
            mem->start_addr = start_addr + size;
507
            size_delta = mem->start_addr - old.start_addr;
508
            mem->memory_size = old.memory_size - size_delta;
509
            mem->phys_offset = old.phys_offset + size_delta;
510
            mem->flags = 0;
511

    
512
            err = kvm_set_user_memory_region(s, mem);
513
            if (err) {
514
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
515
                        __func__, strerror(-err));
516
                abort();
517
            }
518
        }
519
    }
520

    
521
    /* in case the KVM bug workaround already "consumed" the new slot */
522
    if (!size)
523
        return;
524

    
525
    /* KVM does not need to know about this memory */
526
    if (flags >= IO_MEM_UNASSIGNED)
527
        return;
528

    
529
    mem = kvm_alloc_slot(s);
530
    mem->memory_size = size;
531
    mem->start_addr = start_addr;
532
    mem->phys_offset = phys_offset;
533
    mem->flags = 0;
534

    
535
    err = kvm_set_user_memory_region(s, mem);
536
    if (err) {
537
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
538
                strerror(-err));
539
        abort();
540
    }
541
}
542

    
543
static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
544
                                  target_phys_addr_t start_addr,
545
                                  ram_addr_t size,
546
                                  ram_addr_t phys_offset)
547
{
548
        kvm_set_phys_mem(start_addr, size, phys_offset);
549
}
550

    
551
static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
552
                                        target_phys_addr_t start_addr,
553
                                        target_phys_addr_t end_addr)
554
{
555
        return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
556
}
557

    
558
static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
559
                                    int enable)
560
{
561
        return kvm_set_migration_log(enable);
562
}
563

    
564
static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
565
        .set_memory = kvm_client_set_memory,
566
        .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
567
        .migration_log = kvm_client_migration_log,
568
};
569

    
570
int kvm_init(int smp_cpus)
571
{
572
    static const char upgrade_note[] =
573
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
574
        "(see http://sourceforge.net/projects/kvm).\n";
575
    KVMState *s;
576
    int ret;
577
    int i;
578

    
579
    if (smp_cpus > 1) {
580
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
581
        return -EINVAL;
582
    }
583

    
584
    s = qemu_mallocz(sizeof(KVMState));
585

    
586
#ifdef KVM_CAP_SET_GUEST_DEBUG
587
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
588
#endif
589
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
590
        s->slots[i].slot = i;
591

    
592
    s->vmfd = -1;
593
    s->fd = qemu_open("/dev/kvm", O_RDWR);
594
    if (s->fd == -1) {
595
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
596
        ret = -errno;
597
        goto err;
598
    }
599

    
600
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
601
    if (ret < KVM_API_VERSION) {
602
        if (ret > 0)
603
            ret = -EINVAL;
604
        fprintf(stderr, "kvm version too old\n");
605
        goto err;
606
    }
607

    
608
    if (ret > KVM_API_VERSION) {
609
        ret = -EINVAL;
610
        fprintf(stderr, "kvm version not supported\n");
611
        goto err;
612
    }
613

    
614
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
615
    if (s->vmfd < 0)
616
        goto err;
617

    
618
    /* initially, KVM allocated its own memory and we had to jump through
619
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
620
     * just use a user allocated buffer so we can use regular pages
621
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
622
     */
623
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
624
        ret = -EINVAL;
625
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
626
                upgrade_note);
627
        goto err;
628
    }
629

    
630
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
631
     * destroyed properly.  Since we rely on this capability, refuse to work
632
     * with any kernel without this capability. */
633
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
634
        ret = -EINVAL;
635

    
636
        fprintf(stderr,
637
                "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
638
                upgrade_note);
639
        goto err;
640
    }
641

    
642
    s->coalesced_mmio = 0;
643
#ifdef KVM_CAP_COALESCED_MMIO
644
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
645
    s->coalesced_mmio_ring = NULL;
646
#endif
647

    
648
    s->broken_set_mem_region = 1;
649
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
650
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
651
    if (ret > 0) {
652
        s->broken_set_mem_region = 0;
653
    }
654
#endif
655

    
656
    s->vcpu_events = 0;
657
#ifdef KVM_CAP_VCPU_EVENTS
658
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
659
#endif
660

    
661
    ret = kvm_arch_init(s, smp_cpus);
662
    if (ret < 0)
663
        goto err;
664

    
665
    kvm_state = s;
666
    cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
667

    
668
    return 0;
669

    
670
err:
671
    if (s) {
672
        if (s->vmfd != -1)
673
            close(s->vmfd);
674
        if (s->fd != -1)
675
            close(s->fd);
676
    }
677
    qemu_free(s);
678

    
679
    return ret;
680
}
681

    
682
static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
683
                         uint32_t count)
684
{
685
    int i;
686
    uint8_t *ptr = data;
687

    
688
    for (i = 0; i < count; i++) {
689
        if (direction == KVM_EXIT_IO_IN) {
690
            switch (size) {
691
            case 1:
692
                stb_p(ptr, cpu_inb(port));
693
                break;
694
            case 2:
695
                stw_p(ptr, cpu_inw(port));
696
                break;
697
            case 4:
698
                stl_p(ptr, cpu_inl(port));
699
                break;
700
            }
701
        } else {
702
            switch (size) {
703
            case 1:
704
                cpu_outb(port, ldub_p(ptr));
705
                break;
706
            case 2:
707
                cpu_outw(port, lduw_p(ptr));
708
                break;
709
            case 4:
710
                cpu_outl(port, ldl_p(ptr));
711
                break;
712
            }
713
        }
714

    
715
        ptr += size;
716
    }
717

    
718
    return 1;
719
}
720

    
721
void kvm_flush_coalesced_mmio_buffer(void)
722
{
723
#ifdef KVM_CAP_COALESCED_MMIO
724
    KVMState *s = kvm_state;
725
    if (s->coalesced_mmio_ring) {
726
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
727
        while (ring->first != ring->last) {
728
            struct kvm_coalesced_mmio *ent;
729

    
730
            ent = &ring->coalesced_mmio[ring->first];
731

    
732
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
733
            /* FIXME smp_wmb() */
734
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
735
        }
736
    }
737
#endif
738
}
739

    
740
void kvm_cpu_synchronize_state(CPUState *env)
741
{
742
    if (!env->kvm_vcpu_dirty) {
743
        kvm_arch_get_registers(env);
744
        env->kvm_vcpu_dirty = 1;
745
    }
746
}
747

    
748
int kvm_cpu_exec(CPUState *env)
749
{
750
    struct kvm_run *run = env->kvm_run;
751
    int ret;
752

    
753
    dprintf("kvm_cpu_exec()\n");
754

    
755
    do {
756
        if (env->exit_request) {
757
            dprintf("interrupt exit requested\n");
758
            ret = 0;
759
            break;
760
        }
761

    
762
        if (env->kvm_vcpu_dirty) {
763
            kvm_arch_put_registers(env);
764
            env->kvm_vcpu_dirty = 0;
765
        }
766

    
767
        kvm_arch_pre_run(env, run);
768
        qemu_mutex_unlock_iothread();
769
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
770
        qemu_mutex_lock_iothread();
771
        kvm_arch_post_run(env, run);
772

    
773
        if (ret == -EINTR || ret == -EAGAIN) {
774
            cpu_exit(env);
775
            dprintf("io window exit\n");
776
            ret = 0;
777
            break;
778
        }
779

    
780
        if (ret < 0) {
781
            dprintf("kvm run failed %s\n", strerror(-ret));
782
            abort();
783
        }
784

    
785
        kvm_flush_coalesced_mmio_buffer();
786

    
787
        ret = 0; /* exit loop */
788
        switch (run->exit_reason) {
789
        case KVM_EXIT_IO:
790
            dprintf("handle_io\n");
791
            ret = kvm_handle_io(run->io.port,
792
                                (uint8_t *)run + run->io.data_offset,
793
                                run->io.direction,
794
                                run->io.size,
795
                                run->io.count);
796
            break;
797
        case KVM_EXIT_MMIO:
798
            dprintf("handle_mmio\n");
799
            cpu_physical_memory_rw(run->mmio.phys_addr,
800
                                   run->mmio.data,
801
                                   run->mmio.len,
802
                                   run->mmio.is_write);
803
            ret = 1;
804
            break;
805
        case KVM_EXIT_IRQ_WINDOW_OPEN:
806
            dprintf("irq_window_open\n");
807
            break;
808
        case KVM_EXIT_SHUTDOWN:
809
            dprintf("shutdown\n");
810
            qemu_system_reset_request();
811
            ret = 1;
812
            break;
813
        case KVM_EXIT_UNKNOWN:
814
            dprintf("kvm_exit_unknown\n");
815
            break;
816
        case KVM_EXIT_FAIL_ENTRY:
817
            dprintf("kvm_exit_fail_entry\n");
818
            break;
819
        case KVM_EXIT_EXCEPTION:
820
            dprintf("kvm_exit_exception\n");
821
            break;
822
        case KVM_EXIT_DEBUG:
823
            dprintf("kvm_exit_debug\n");
824
#ifdef KVM_CAP_SET_GUEST_DEBUG
825
            if (kvm_arch_debug(&run->debug.arch)) {
826
                gdb_set_stop_cpu(env);
827
                vm_stop(EXCP_DEBUG);
828
                env->exception_index = EXCP_DEBUG;
829
                return 0;
830
            }
831
            /* re-enter, this exception was guest-internal */
832
            ret = 1;
833
#endif /* KVM_CAP_SET_GUEST_DEBUG */
834
            break;
835
        default:
836
            dprintf("kvm_arch_handle_exit\n");
837
            ret = kvm_arch_handle_exit(env, run);
838
            break;
839
        }
840
    } while (ret > 0);
841

    
842
    if (env->exit_request) {
843
        env->exit_request = 0;
844
        env->exception_index = EXCP_INTERRUPT;
845
    }
846

    
847
    return ret;
848
}
849

    
850
int kvm_ioctl(KVMState *s, int type, ...)
851
{
852
    int ret;
853
    void *arg;
854
    va_list ap;
855

    
856
    va_start(ap, type);
857
    arg = va_arg(ap, void *);
858
    va_end(ap);
859

    
860
    ret = ioctl(s->fd, type, arg);
861
    if (ret == -1)
862
        ret = -errno;
863

    
864
    return ret;
865
}
866

    
867
int kvm_vm_ioctl(KVMState *s, int type, ...)
868
{
869
    int ret;
870
    void *arg;
871
    va_list ap;
872

    
873
    va_start(ap, type);
874
    arg = va_arg(ap, void *);
875
    va_end(ap);
876

    
877
    ret = ioctl(s->vmfd, type, arg);
878
    if (ret == -1)
879
        ret = -errno;
880

    
881
    return ret;
882
}
883

    
884
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
885
{
886
    int ret;
887
    void *arg;
888
    va_list ap;
889

    
890
    va_start(ap, type);
891
    arg = va_arg(ap, void *);
892
    va_end(ap);
893

    
894
    ret = ioctl(env->kvm_fd, type, arg);
895
    if (ret == -1)
896
        ret = -errno;
897

    
898
    return ret;
899
}
900

    
901
int kvm_has_sync_mmu(void)
902
{
903
#ifdef KVM_CAP_SYNC_MMU
904
    KVMState *s = kvm_state;
905

    
906
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
907
#else
908
    return 0;
909
#endif
910
}
911

    
912
int kvm_has_vcpu_events(void)
913
{
914
    return kvm_state->vcpu_events;
915
}
916

    
917
void kvm_setup_guest_memory(void *start, size_t size)
918
{
919
    if (!kvm_has_sync_mmu()) {
920
#ifdef MADV_DONTFORK
921
        int ret = madvise(start, size, MADV_DONTFORK);
922

    
923
        if (ret) {
924
            perror("madvice");
925
            exit(1);
926
        }
927
#else
928
        fprintf(stderr,
929
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
930
        exit(1);
931
#endif
932
    }
933
}
934

    
935
#ifdef KVM_CAP_SET_GUEST_DEBUG
936
static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
937
{
938
#ifdef CONFIG_IOTHREAD
939
    if (env != cpu_single_env) {
940
        abort();
941
    }
942
#endif
943
    func(data);
944
}
945

    
946
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
947
                                                 target_ulong pc)
948
{
949
    struct kvm_sw_breakpoint *bp;
950

    
951
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
952
        if (bp->pc == pc)
953
            return bp;
954
    }
955
    return NULL;
956
}
957

    
958
int kvm_sw_breakpoints_active(CPUState *env)
959
{
960
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
961
}
962

    
963
struct kvm_set_guest_debug_data {
964
    struct kvm_guest_debug dbg;
965
    CPUState *env;
966
    int err;
967
};
968

    
969
static void kvm_invoke_set_guest_debug(void *data)
970
{
971
    struct kvm_set_guest_debug_data *dbg_data = data;
972
    CPUState *env = dbg_data->env;
973

    
974
    if (env->kvm_vcpu_dirty) {
975
        kvm_arch_put_registers(env);
976
        env->kvm_vcpu_dirty = 0;
977
    }
978
    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
979
}
980

    
981
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
982
{
983
    struct kvm_set_guest_debug_data data;
984

    
985
    data.dbg.control = 0;
986
    if (env->singlestep_enabled)
987
        data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
988

    
989
    kvm_arch_update_guest_debug(env, &data.dbg);
990
    data.dbg.control |= reinject_trap;
991
    data.env = env;
992

    
993
    on_vcpu(env, kvm_invoke_set_guest_debug, &data);
994
    return data.err;
995
}
996

    
997
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
998
                          target_ulong len, int type)
999
{
1000
    struct kvm_sw_breakpoint *bp;
1001
    CPUState *env;
1002
    int err;
1003

    
1004
    if (type == GDB_BREAKPOINT_SW) {
1005
        bp = kvm_find_sw_breakpoint(current_env, addr);
1006
        if (bp) {
1007
            bp->use_count++;
1008
            return 0;
1009
        }
1010

    
1011
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1012
        if (!bp)
1013
            return -ENOMEM;
1014

    
1015
        bp->pc = addr;
1016
        bp->use_count = 1;
1017
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1018
        if (err) {
1019
            free(bp);
1020
            return err;
1021
        }
1022

    
1023
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1024
                          bp, entry);
1025
    } else {
1026
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1027
        if (err)
1028
            return err;
1029
    }
1030

    
1031
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1032
        err = kvm_update_guest_debug(env, 0);
1033
        if (err)
1034
            return err;
1035
    }
1036
    return 0;
1037
}
1038

    
1039
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1040
                          target_ulong len, int type)
1041
{
1042
    struct kvm_sw_breakpoint *bp;
1043
    CPUState *env;
1044
    int err;
1045

    
1046
    if (type == GDB_BREAKPOINT_SW) {
1047
        bp = kvm_find_sw_breakpoint(current_env, addr);
1048
        if (!bp)
1049
            return -ENOENT;
1050

    
1051
        if (bp->use_count > 1) {
1052
            bp->use_count--;
1053
            return 0;
1054
        }
1055

    
1056
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1057
        if (err)
1058
            return err;
1059

    
1060
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1061
        qemu_free(bp);
1062
    } else {
1063
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1064
        if (err)
1065
            return err;
1066
    }
1067

    
1068
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1069
        err = kvm_update_guest_debug(env, 0);
1070
        if (err)
1071
            return err;
1072
    }
1073
    return 0;
1074
}
1075

    
1076
void kvm_remove_all_breakpoints(CPUState *current_env)
1077
{
1078
    struct kvm_sw_breakpoint *bp, *next;
1079
    KVMState *s = current_env->kvm_state;
1080
    CPUState *env;
1081

    
1082
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1083
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1084
            /* Try harder to find a CPU that currently sees the breakpoint. */
1085
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
1086
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1087
                    break;
1088
            }
1089
        }
1090
    }
1091
    kvm_arch_remove_all_hw_breakpoints();
1092

    
1093
    for (env = first_cpu; env != NULL; env = env->next_cpu)
1094
        kvm_update_guest_debug(env, 0);
1095
}
1096

    
1097
#else /* !KVM_CAP_SET_GUEST_DEBUG */
1098

    
1099
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1100
{
1101
    return -EINVAL;
1102
}
1103

    
1104
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1105
                          target_ulong len, int type)
1106
{
1107
    return -EINVAL;
1108
}
1109

    
1110
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1111
                          target_ulong len, int type)
1112
{
1113
    return -EINVAL;
1114
}
1115

    
1116
void kvm_remove_all_breakpoints(CPUState *current_env)
1117
{
1118
}
1119
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
1120

    
1121
int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
1122
{
1123
    struct kvm_signal_mask *sigmask;
1124
    int r;
1125

    
1126
    if (!sigset)
1127
        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
1128

    
1129
    sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1130

    
1131
    sigmask->len = 8;
1132
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1133
    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1134
    free(sigmask);
1135

    
1136
    return r;
1137
}