Statistics
| Branch: | Revision:

root / kvm-all.c @ 6312b928

History | View | Annotate | Download (28.7 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "hw/hw.h"
26
#include "gdbstub.h"
27
#include "kvm.h"
28

    
29
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
30
#define PAGE_SIZE TARGET_PAGE_SIZE
31

    
32
//#define DEBUG_KVM
33

    
34
#ifdef DEBUG_KVM
35
#define dprintf(fmt, ...) \
36
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
37
#else
38
#define dprintf(fmt, ...) \
39
    do { } while (0)
40
#endif
41

    
42
typedef struct KVMSlot
43
{
44
    target_phys_addr_t start_addr;
45
    ram_addr_t memory_size;
46
    ram_addr_t phys_offset;
47
    int slot;
48
    int flags;
49
} KVMSlot;
50

    
51
typedef struct kvm_dirty_log KVMDirtyLog;
52

    
53
int kvm_allowed = 0;
54

    
55
struct KVMState
56
{
57
    KVMSlot slots[32];
58
    int fd;
59
    int vmfd;
60
    int coalesced_mmio;
61
#ifdef KVM_CAP_COALESCED_MMIO
62
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
63
#endif
64
    int broken_set_mem_region;
65
    int migration_log;
66
    int vcpu_events;
67
#ifdef KVM_CAP_SET_GUEST_DEBUG
68
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
69
#endif
70
    int irqchip_in_kernel;
71
    int pit_in_kernel;
72
};
73

    
74
static KVMState *kvm_state;
75

    
76
static KVMSlot *kvm_alloc_slot(KVMState *s)
77
{
78
    int i;
79

    
80
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
81
        /* KVM private memory slots */
82
        if (i >= 8 && i < 12)
83
            continue;
84
        if (s->slots[i].memory_size == 0)
85
            return &s->slots[i];
86
    }
87

    
88
    fprintf(stderr, "%s: no free slot available\n", __func__);
89
    abort();
90
}
91

    
92
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
93
                                         target_phys_addr_t start_addr,
94
                                         target_phys_addr_t end_addr)
95
{
96
    int i;
97

    
98
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
99
        KVMSlot *mem = &s->slots[i];
100

    
101
        if (start_addr == mem->start_addr &&
102
            end_addr == mem->start_addr + mem->memory_size) {
103
            return mem;
104
        }
105
    }
106

    
107
    return NULL;
108
}
109

    
110
/*
111
 * Find overlapping slot with lowest start address
112
 */
113
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
114
                                            target_phys_addr_t start_addr,
115
                                            target_phys_addr_t end_addr)
116
{
117
    KVMSlot *found = NULL;
118
    int i;
119

    
120
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
121
        KVMSlot *mem = &s->slots[i];
122

    
123
        if (mem->memory_size == 0 ||
124
            (found && found->start_addr < mem->start_addr)) {
125
            continue;
126
        }
127

    
128
        if (end_addr > mem->start_addr &&
129
            start_addr < mem->start_addr + mem->memory_size) {
130
            found = mem;
131
        }
132
    }
133

    
134
    return found;
135
}
136

    
137
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
138
{
139
    struct kvm_userspace_memory_region mem;
140

    
141
    mem.slot = slot->slot;
142
    mem.guest_phys_addr = slot->start_addr;
143
    mem.memory_size = slot->memory_size;
144
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
145
    mem.flags = slot->flags;
146
    if (s->migration_log) {
147
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
148
    }
149
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
150
}
151

    
152
static void kvm_reset_vcpu(void *opaque)
153
{
154
    CPUState *env = opaque;
155

    
156
    kvm_arch_reset_vcpu(env);
157
    if (kvm_arch_put_registers(env)) {
158
        fprintf(stderr, "Fatal: kvm vcpu reset failed\n");
159
        abort();
160
    }
161
}
162

    
163
int kvm_irqchip_in_kernel(void)
164
{
165
    return kvm_state->irqchip_in_kernel;
166
}
167

    
168
int kvm_pit_in_kernel(void)
169
{
170
    return kvm_state->pit_in_kernel;
171
}
172

    
173

    
174
int kvm_init_vcpu(CPUState *env)
175
{
176
    KVMState *s = kvm_state;
177
    long mmap_size;
178
    int ret;
179

    
180
    dprintf("kvm_init_vcpu\n");
181

    
182
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
183
    if (ret < 0) {
184
        dprintf("kvm_create_vcpu failed\n");
185
        goto err;
186
    }
187

    
188
    env->kvm_fd = ret;
189
    env->kvm_state = s;
190

    
191
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
192
    if (mmap_size < 0) {
193
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
194
        goto err;
195
    }
196

    
197
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
198
                        env->kvm_fd, 0);
199
    if (env->kvm_run == MAP_FAILED) {
200
        ret = -errno;
201
        dprintf("mmap'ing vcpu state failed\n");
202
        goto err;
203
    }
204

    
205
#ifdef KVM_CAP_COALESCED_MMIO
206
    if (s->coalesced_mmio && !s->coalesced_mmio_ring)
207
        s->coalesced_mmio_ring = (void *) env->kvm_run +
208
                s->coalesced_mmio * PAGE_SIZE;
209
#endif
210

    
211
    ret = kvm_arch_init_vcpu(env);
212
    if (ret == 0) {
213
        qemu_register_reset(kvm_reset_vcpu, env);
214
        kvm_arch_reset_vcpu(env);
215
        ret = kvm_arch_put_registers(env);
216
    }
217
err:
218
    return ret;
219
}
220

    
221
/*
222
 * dirty pages logging control
223
 */
224
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
225
                                      ram_addr_t size, int flags, int mask)
226
{
227
    KVMState *s = kvm_state;
228
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
229
    int old_flags;
230

    
231
    if (mem == NULL)  {
232
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
233
                    TARGET_FMT_plx "\n", __func__, phys_addr,
234
                    (target_phys_addr_t)(phys_addr + size - 1));
235
            return -EINVAL;
236
    }
237

    
238
    old_flags = mem->flags;
239

    
240
    flags = (mem->flags & ~mask) | flags;
241
    mem->flags = flags;
242

    
243
    /* If nothing changed effectively, no need to issue ioctl */
244
    if (s->migration_log) {
245
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
246
    }
247
    if (flags == old_flags) {
248
            return 0;
249
    }
250

    
251
    return kvm_set_user_memory_region(s, mem);
252
}
253

    
254
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
255
{
256
        return kvm_dirty_pages_log_change(phys_addr, size,
257
                                          KVM_MEM_LOG_DIRTY_PAGES,
258
                                          KVM_MEM_LOG_DIRTY_PAGES);
259
}
260

    
261
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
262
{
263
        return kvm_dirty_pages_log_change(phys_addr, size,
264
                                          0,
265
                                          KVM_MEM_LOG_DIRTY_PAGES);
266
}
267

    
268
static int kvm_set_migration_log(int enable)
269
{
270
    KVMState *s = kvm_state;
271
    KVMSlot *mem;
272
    int i, err;
273

    
274
    s->migration_log = enable;
275

    
276
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
277
        mem = &s->slots[i];
278

    
279
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
280
            continue;
281
        }
282
        err = kvm_set_user_memory_region(s, mem);
283
        if (err) {
284
            return err;
285
        }
286
    }
287
    return 0;
288
}
289

    
290
static int test_le_bit(unsigned long nr, unsigned char *addr)
291
{
292
    return (addr[nr >> 3] >> (nr & 7)) & 1;
293
}
294

    
295
/**
296
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
297
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
298
 * This means all bits are set to dirty.
299
 *
300
 * @start_add: start of logged region.
301
 * @end_addr: end of logged region.
302
 */
303
static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
304
                                          target_phys_addr_t end_addr)
305
{
306
    KVMState *s = kvm_state;
307
    unsigned long size, allocated_size = 0;
308
    target_phys_addr_t phys_addr;
309
    ram_addr_t addr;
310
    KVMDirtyLog d;
311
    KVMSlot *mem;
312
    int ret = 0;
313

    
314
    d.dirty_bitmap = NULL;
315
    while (start_addr < end_addr) {
316
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
317
        if (mem == NULL) {
318
            break;
319
        }
320

    
321
        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
322
        if (!d.dirty_bitmap) {
323
            d.dirty_bitmap = qemu_malloc(size);
324
        } else if (size > allocated_size) {
325
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
326
        }
327
        allocated_size = size;
328
        memset(d.dirty_bitmap, 0, allocated_size);
329

    
330
        d.slot = mem->slot;
331

    
332
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
333
            dprintf("ioctl failed %d\n", errno);
334
            ret = -1;
335
            break;
336
        }
337

    
338
        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
339
             phys_addr < mem->start_addr + mem->memory_size;
340
             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
341
            unsigned char *bitmap = (unsigned char *)d.dirty_bitmap;
342
            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
343

    
344
            if (test_le_bit(nr, bitmap)) {
345
                cpu_physical_memory_set_dirty(addr);
346
            }
347
        }
348
        start_addr = phys_addr;
349
    }
350
    qemu_free(d.dirty_bitmap);
351

    
352
    return ret;
353
}
354

    
355
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
356
{
357
    int ret = -ENOSYS;
358
#ifdef KVM_CAP_COALESCED_MMIO
359
    KVMState *s = kvm_state;
360

    
361
    if (s->coalesced_mmio) {
362
        struct kvm_coalesced_mmio_zone zone;
363

    
364
        zone.addr = start;
365
        zone.size = size;
366

    
367
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
368
    }
369
#endif
370

    
371
    return ret;
372
}
373

    
374
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
375
{
376
    int ret = -ENOSYS;
377
#ifdef KVM_CAP_COALESCED_MMIO
378
    KVMState *s = kvm_state;
379

    
380
    if (s->coalesced_mmio) {
381
        struct kvm_coalesced_mmio_zone zone;
382

    
383
        zone.addr = start;
384
        zone.size = size;
385

    
386
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
387
    }
388
#endif
389

    
390
    return ret;
391
}
392

    
393
int kvm_check_extension(KVMState *s, unsigned int extension)
394
{
395
    int ret;
396

    
397
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
398
    if (ret < 0) {
399
        ret = 0;
400
    }
401

    
402
    return ret;
403
}
404

    
405
static void kvm_set_phys_mem(target_phys_addr_t start_addr,
406
                             ram_addr_t size,
407
                             ram_addr_t phys_offset)
408
{
409
    KVMState *s = kvm_state;
410
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
411
    KVMSlot *mem, old;
412
    int err;
413

    
414
    if (start_addr & ~TARGET_PAGE_MASK) {
415
        if (flags >= IO_MEM_UNASSIGNED) {
416
            if (!kvm_lookup_overlapping_slot(s, start_addr,
417
                                             start_addr + size)) {
418
                return;
419
            }
420
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
421
        } else {
422
            fprintf(stderr, "Only page-aligned memory slots supported\n");
423
        }
424
        abort();
425
    }
426

    
427
    /* KVM does not support read-only slots */
428
    phys_offset &= ~IO_MEM_ROM;
429

    
430
    while (1) {
431
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
432
        if (!mem) {
433
            break;
434
        }
435

    
436
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
437
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
438
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
439
            /* The new slot fits into the existing one and comes with
440
             * identical parameters - nothing to be done. */
441
            return;
442
        }
443

    
444
        old = *mem;
445

    
446
        /* unregister the overlapping slot */
447
        mem->memory_size = 0;
448
        err = kvm_set_user_memory_region(s, mem);
449
        if (err) {
450
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
451
                    __func__, strerror(-err));
452
            abort();
453
        }
454

    
455
        /* Workaround for older KVM versions: we can't join slots, even not by
456
         * unregistering the previous ones and then registering the larger
457
         * slot. We have to maintain the existing fragmentation. Sigh.
458
         *
459
         * This workaround assumes that the new slot starts at the same
460
         * address as the first existing one. If not or if some overlapping
461
         * slot comes around later, we will fail (not seen in practice so far)
462
         * - and actually require a recent KVM version. */
463
        if (s->broken_set_mem_region &&
464
            old.start_addr == start_addr && old.memory_size < size &&
465
            flags < IO_MEM_UNASSIGNED) {
466
            mem = kvm_alloc_slot(s);
467
            mem->memory_size = old.memory_size;
468
            mem->start_addr = old.start_addr;
469
            mem->phys_offset = old.phys_offset;
470
            mem->flags = 0;
471

    
472
            err = kvm_set_user_memory_region(s, mem);
473
            if (err) {
474
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
475
                        strerror(-err));
476
                abort();
477
            }
478

    
479
            start_addr += old.memory_size;
480
            phys_offset += old.memory_size;
481
            size -= old.memory_size;
482
            continue;
483
        }
484

    
485
        /* register prefix slot */
486
        if (old.start_addr < start_addr) {
487
            mem = kvm_alloc_slot(s);
488
            mem->memory_size = start_addr - old.start_addr;
489
            mem->start_addr = old.start_addr;
490
            mem->phys_offset = old.phys_offset;
491
            mem->flags = 0;
492

    
493
            err = kvm_set_user_memory_region(s, mem);
494
            if (err) {
495
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
496
                        __func__, strerror(-err));
497
                abort();
498
            }
499
        }
500

    
501
        /* register suffix slot */
502
        if (old.start_addr + old.memory_size > start_addr + size) {
503
            ram_addr_t size_delta;
504

    
505
            mem = kvm_alloc_slot(s);
506
            mem->start_addr = start_addr + size;
507
            size_delta = mem->start_addr - old.start_addr;
508
            mem->memory_size = old.memory_size - size_delta;
509
            mem->phys_offset = old.phys_offset + size_delta;
510
            mem->flags = 0;
511

    
512
            err = kvm_set_user_memory_region(s, mem);
513
            if (err) {
514
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
515
                        __func__, strerror(-err));
516
                abort();
517
            }
518
        }
519
    }
520

    
521
    /* in case the KVM bug workaround already "consumed" the new slot */
522
    if (!size)
523
        return;
524

    
525
    /* KVM does not need to know about this memory */
526
    if (flags >= IO_MEM_UNASSIGNED)
527
        return;
528

    
529
    mem = kvm_alloc_slot(s);
530
    mem->memory_size = size;
531
    mem->start_addr = start_addr;
532
    mem->phys_offset = phys_offset;
533
    mem->flags = 0;
534

    
535
    err = kvm_set_user_memory_region(s, mem);
536
    if (err) {
537
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
538
                strerror(-err));
539
        abort();
540
    }
541
}
542

    
543
static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
544
                                  target_phys_addr_t start_addr,
545
                                  ram_addr_t size,
546
                                  ram_addr_t phys_offset)
547
{
548
        kvm_set_phys_mem(start_addr, size, phys_offset);
549
}
550

    
551
static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
552
                                        target_phys_addr_t start_addr,
553
                                        target_phys_addr_t end_addr)
554
{
555
        return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
556
}
557

    
558
static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
559
                                    int enable)
560
{
561
        return kvm_set_migration_log(enable);
562
}
563

    
564
static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
565
        .set_memory = kvm_client_set_memory,
566
        .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
567
        .migration_log = kvm_client_migration_log,
568
};
569

    
570
int kvm_init(int smp_cpus)
571
{
572
    static const char upgrade_note[] =
573
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
574
        "(see http://sourceforge.net/projects/kvm).\n";
575
    KVMState *s;
576
    int ret;
577
    int i;
578

    
579
    if (smp_cpus > 1) {
580
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
581
        return -EINVAL;
582
    }
583

    
584
    s = qemu_mallocz(sizeof(KVMState));
585

    
586
#ifdef KVM_CAP_SET_GUEST_DEBUG
587
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
588
#endif
589
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
590
        s->slots[i].slot = i;
591

    
592
    s->vmfd = -1;
593
    s->fd = qemu_open("/dev/kvm", O_RDWR);
594
    if (s->fd == -1) {
595
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
596
        ret = -errno;
597
        goto err;
598
    }
599

    
600
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
601
    if (ret < KVM_API_VERSION) {
602
        if (ret > 0)
603
            ret = -EINVAL;
604
        fprintf(stderr, "kvm version too old\n");
605
        goto err;
606
    }
607

    
608
    if (ret > KVM_API_VERSION) {
609
        ret = -EINVAL;
610
        fprintf(stderr, "kvm version not supported\n");
611
        goto err;
612
    }
613

    
614
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
615
    if (s->vmfd < 0)
616
        goto err;
617

    
618
    /* initially, KVM allocated its own memory and we had to jump through
619
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
620
     * just use a user allocated buffer so we can use regular pages
621
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
622
     */
623
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
624
        ret = -EINVAL;
625
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
626
                upgrade_note);
627
        goto err;
628
    }
629

    
630
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
631
     * destroyed properly.  Since we rely on this capability, refuse to work
632
     * with any kernel without this capability. */
633
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
634
        ret = -EINVAL;
635

    
636
        fprintf(stderr,
637
                "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
638
                upgrade_note);
639
        goto err;
640
    }
641

    
642
    s->coalesced_mmio = 0;
643
#ifdef KVM_CAP_COALESCED_MMIO
644
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
645
    s->coalesced_mmio_ring = NULL;
646
#endif
647

    
648
    s->broken_set_mem_region = 1;
649
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
650
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
651
    if (ret > 0) {
652
        s->broken_set_mem_region = 0;
653
    }
654
#endif
655

    
656
    s->vcpu_events = 0;
657
#ifdef KVM_CAP_VCPU_EVENTS
658
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
659
#endif
660

    
661
    ret = kvm_arch_init(s, smp_cpus);
662
    if (ret < 0)
663
        goto err;
664

    
665
    kvm_state = s;
666
    cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
667

    
668
    return 0;
669

    
670
err:
671
    if (s) {
672
        if (s->vmfd != -1)
673
            close(s->vmfd);
674
        if (s->fd != -1)
675
            close(s->fd);
676
    }
677
    qemu_free(s);
678

    
679
    return ret;
680
}
681

    
682
static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
683
                         uint32_t count)
684
{
685
    int i;
686
    uint8_t *ptr = data;
687

    
688
    for (i = 0; i < count; i++) {
689
        if (direction == KVM_EXIT_IO_IN) {
690
            switch (size) {
691
            case 1:
692
                stb_p(ptr, cpu_inb(port));
693
                break;
694
            case 2:
695
                stw_p(ptr, cpu_inw(port));
696
                break;
697
            case 4:
698
                stl_p(ptr, cpu_inl(port));
699
                break;
700
            }
701
        } else {
702
            switch (size) {
703
            case 1:
704
                cpu_outb(port, ldub_p(ptr));
705
                break;
706
            case 2:
707
                cpu_outw(port, lduw_p(ptr));
708
                break;
709
            case 4:
710
                cpu_outl(port, ldl_p(ptr));
711
                break;
712
            }
713
        }
714

    
715
        ptr += size;
716
    }
717

    
718
    return 1;
719
}
720

    
721
void kvm_flush_coalesced_mmio_buffer(void)
722
{
723
#ifdef KVM_CAP_COALESCED_MMIO
724
    KVMState *s = kvm_state;
725
    if (s->coalesced_mmio_ring) {
726
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
727
        while (ring->first != ring->last) {
728
            struct kvm_coalesced_mmio *ent;
729

    
730
            ent = &ring->coalesced_mmio[ring->first];
731

    
732
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
733
            /* FIXME smp_wmb() */
734
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
735
        }
736
    }
737
#endif
738
}
739

    
740
void kvm_cpu_synchronize_state(CPUState *env)
741
{
742
    if (!env->kvm_vcpu_dirty) {
743
        kvm_arch_get_registers(env);
744
        env->kvm_vcpu_dirty = 1;
745
    }
746
}
747

    
748
int kvm_cpu_exec(CPUState *env)
749
{
750
    struct kvm_run *run = env->kvm_run;
751
    int ret;
752

    
753
    dprintf("kvm_cpu_exec()\n");
754

    
755
    do {
756
#ifndef CONFIG_IOTHREAD
757
        if (env->exit_request) {
758
            dprintf("interrupt exit requested\n");
759
            ret = 0;
760
            break;
761
        }
762
#endif
763

    
764
        if (env->kvm_vcpu_dirty) {
765
            kvm_arch_put_registers(env);
766
            env->kvm_vcpu_dirty = 0;
767
        }
768

    
769
        kvm_arch_pre_run(env, run);
770
        qemu_mutex_unlock_iothread();
771
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
772
        qemu_mutex_lock_iothread();
773
        kvm_arch_post_run(env, run);
774

    
775
        if (ret == -EINTR || ret == -EAGAIN) {
776
            cpu_exit(env);
777
            dprintf("io window exit\n");
778
            ret = 0;
779
            break;
780
        }
781

    
782
        if (ret < 0) {
783
            dprintf("kvm run failed %s\n", strerror(-ret));
784
            abort();
785
        }
786

    
787
        kvm_flush_coalesced_mmio_buffer();
788

    
789
        ret = 0; /* exit loop */
790
        switch (run->exit_reason) {
791
        case KVM_EXIT_IO:
792
            dprintf("handle_io\n");
793
            ret = kvm_handle_io(run->io.port,
794
                                (uint8_t *)run + run->io.data_offset,
795
                                run->io.direction,
796
                                run->io.size,
797
                                run->io.count);
798
            break;
799
        case KVM_EXIT_MMIO:
800
            dprintf("handle_mmio\n");
801
            cpu_physical_memory_rw(run->mmio.phys_addr,
802
                                   run->mmio.data,
803
                                   run->mmio.len,
804
                                   run->mmio.is_write);
805
            ret = 1;
806
            break;
807
        case KVM_EXIT_IRQ_WINDOW_OPEN:
808
            dprintf("irq_window_open\n");
809
            break;
810
        case KVM_EXIT_SHUTDOWN:
811
            dprintf("shutdown\n");
812
            qemu_system_reset_request();
813
            ret = 1;
814
            break;
815
        case KVM_EXIT_UNKNOWN:
816
            dprintf("kvm_exit_unknown\n");
817
            break;
818
        case KVM_EXIT_FAIL_ENTRY:
819
            dprintf("kvm_exit_fail_entry\n");
820
            break;
821
        case KVM_EXIT_EXCEPTION:
822
            dprintf("kvm_exit_exception\n");
823
            break;
824
        case KVM_EXIT_DEBUG:
825
            dprintf("kvm_exit_debug\n");
826
#ifdef KVM_CAP_SET_GUEST_DEBUG
827
            if (kvm_arch_debug(&run->debug.arch)) {
828
                gdb_set_stop_cpu(env);
829
                vm_stop(EXCP_DEBUG);
830
                env->exception_index = EXCP_DEBUG;
831
                return 0;
832
            }
833
            /* re-enter, this exception was guest-internal */
834
            ret = 1;
835
#endif /* KVM_CAP_SET_GUEST_DEBUG */
836
            break;
837
        default:
838
            dprintf("kvm_arch_handle_exit\n");
839
            ret = kvm_arch_handle_exit(env, run);
840
            break;
841
        }
842
    } while (ret > 0);
843

    
844
    if (env->exit_request) {
845
        env->exit_request = 0;
846
        env->exception_index = EXCP_INTERRUPT;
847
    }
848

    
849
    return ret;
850
}
851

    
852
int kvm_ioctl(KVMState *s, int type, ...)
853
{
854
    int ret;
855
    void *arg;
856
    va_list ap;
857

    
858
    va_start(ap, type);
859
    arg = va_arg(ap, void *);
860
    va_end(ap);
861

    
862
    ret = ioctl(s->fd, type, arg);
863
    if (ret == -1)
864
        ret = -errno;
865

    
866
    return ret;
867
}
868

    
869
int kvm_vm_ioctl(KVMState *s, int type, ...)
870
{
871
    int ret;
872
    void *arg;
873
    va_list ap;
874

    
875
    va_start(ap, type);
876
    arg = va_arg(ap, void *);
877
    va_end(ap);
878

    
879
    ret = ioctl(s->vmfd, type, arg);
880
    if (ret == -1)
881
        ret = -errno;
882

    
883
    return ret;
884
}
885

    
886
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
887
{
888
    int ret;
889
    void *arg;
890
    va_list ap;
891

    
892
    va_start(ap, type);
893
    arg = va_arg(ap, void *);
894
    va_end(ap);
895

    
896
    ret = ioctl(env->kvm_fd, type, arg);
897
    if (ret == -1)
898
        ret = -errno;
899

    
900
    return ret;
901
}
902

    
903
int kvm_has_sync_mmu(void)
904
{
905
#ifdef KVM_CAP_SYNC_MMU
906
    KVMState *s = kvm_state;
907

    
908
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
909
#else
910
    return 0;
911
#endif
912
}
913

    
914
int kvm_has_vcpu_events(void)
915
{
916
    return kvm_state->vcpu_events;
917
}
918

    
919
void kvm_setup_guest_memory(void *start, size_t size)
920
{
921
    if (!kvm_has_sync_mmu()) {
922
#ifdef MADV_DONTFORK
923
        int ret = madvise(start, size, MADV_DONTFORK);
924

    
925
        if (ret) {
926
            perror("madvice");
927
            exit(1);
928
        }
929
#else
930
        fprintf(stderr,
931
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
932
        exit(1);
933
#endif
934
    }
935
}
936

    
937
#ifdef KVM_CAP_SET_GUEST_DEBUG
938
static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
939
{
940
#ifdef CONFIG_IOTHREAD
941
    if (env != cpu_single_env) {
942
        abort();
943
    }
944
#endif
945
    func(data);
946
}
947

    
948
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
949
                                                 target_ulong pc)
950
{
951
    struct kvm_sw_breakpoint *bp;
952

    
953
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
954
        if (bp->pc == pc)
955
            return bp;
956
    }
957
    return NULL;
958
}
959

    
960
int kvm_sw_breakpoints_active(CPUState *env)
961
{
962
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
963
}
964

    
965
struct kvm_set_guest_debug_data {
966
    struct kvm_guest_debug dbg;
967
    CPUState *env;
968
    int err;
969
};
970

    
971
static void kvm_invoke_set_guest_debug(void *data)
972
{
973
    struct kvm_set_guest_debug_data *dbg_data = data;
974
    CPUState *env = dbg_data->env;
975

    
976
    if (env->kvm_vcpu_dirty) {
977
        kvm_arch_put_registers(env);
978
        env->kvm_vcpu_dirty = 0;
979
    }
980
    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
981
}
982

    
983
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
984
{
985
    struct kvm_set_guest_debug_data data;
986

    
987
    data.dbg.control = 0;
988
    if (env->singlestep_enabled)
989
        data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
990

    
991
    kvm_arch_update_guest_debug(env, &data.dbg);
992
    data.dbg.control |= reinject_trap;
993
    data.env = env;
994

    
995
    on_vcpu(env, kvm_invoke_set_guest_debug, &data);
996
    return data.err;
997
}
998

    
999
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1000
                          target_ulong len, int type)
1001
{
1002
    struct kvm_sw_breakpoint *bp;
1003
    CPUState *env;
1004
    int err;
1005

    
1006
    if (type == GDB_BREAKPOINT_SW) {
1007
        bp = kvm_find_sw_breakpoint(current_env, addr);
1008
        if (bp) {
1009
            bp->use_count++;
1010
            return 0;
1011
        }
1012

    
1013
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1014
        if (!bp)
1015
            return -ENOMEM;
1016

    
1017
        bp->pc = addr;
1018
        bp->use_count = 1;
1019
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1020
        if (err) {
1021
            free(bp);
1022
            return err;
1023
        }
1024

    
1025
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1026
                          bp, entry);
1027
    } else {
1028
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1029
        if (err)
1030
            return err;
1031
    }
1032

    
1033
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1034
        err = kvm_update_guest_debug(env, 0);
1035
        if (err)
1036
            return err;
1037
    }
1038
    return 0;
1039
}
1040

    
1041
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1042
                          target_ulong len, int type)
1043
{
1044
    struct kvm_sw_breakpoint *bp;
1045
    CPUState *env;
1046
    int err;
1047

    
1048
    if (type == GDB_BREAKPOINT_SW) {
1049
        bp = kvm_find_sw_breakpoint(current_env, addr);
1050
        if (!bp)
1051
            return -ENOENT;
1052

    
1053
        if (bp->use_count > 1) {
1054
            bp->use_count--;
1055
            return 0;
1056
        }
1057

    
1058
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1059
        if (err)
1060
            return err;
1061

    
1062
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1063
        qemu_free(bp);
1064
    } else {
1065
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1066
        if (err)
1067
            return err;
1068
    }
1069

    
1070
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1071
        err = kvm_update_guest_debug(env, 0);
1072
        if (err)
1073
            return err;
1074
    }
1075
    return 0;
1076
}
1077

    
1078
void kvm_remove_all_breakpoints(CPUState *current_env)
1079
{
1080
    struct kvm_sw_breakpoint *bp, *next;
1081
    KVMState *s = current_env->kvm_state;
1082
    CPUState *env;
1083

    
1084
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1085
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1086
            /* Try harder to find a CPU that currently sees the breakpoint. */
1087
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
1088
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1089
                    break;
1090
            }
1091
        }
1092
    }
1093
    kvm_arch_remove_all_hw_breakpoints();
1094

    
1095
    for (env = first_cpu; env != NULL; env = env->next_cpu)
1096
        kvm_update_guest_debug(env, 0);
1097
}
1098

    
1099
#else /* !KVM_CAP_SET_GUEST_DEBUG */
1100

    
1101
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1102
{
1103
    return -EINVAL;
1104
}
1105

    
1106
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1107
                          target_ulong len, int type)
1108
{
1109
    return -EINVAL;
1110
}
1111

    
1112
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1113
                          target_ulong len, int type)
1114
{
1115
    return -EINVAL;
1116
}
1117

    
1118
void kvm_remove_all_breakpoints(CPUState *current_env)
1119
{
1120
}
1121
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
1122

    
1123
int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
1124
{
1125
    struct kvm_signal_mask *sigmask;
1126
    int r;
1127

    
1128
    if (!sigset)
1129
        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
1130

    
1131
    sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1132

    
1133
    sigmask->len = 8;
1134
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1135
    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1136
    free(sigmask);
1137

    
1138
    return r;
1139
}