Statistics
| Branch: | Revision:

root / kvm-all.c @ a2eebe88

History | View | Annotate | Download (28.3 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "hw/hw.h"
26
#include "gdbstub.h"
27
#include "kvm.h"
28

    
29
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
30
#define PAGE_SIZE TARGET_PAGE_SIZE
31

    
32
//#define DEBUG_KVM
33

    
34
#ifdef DEBUG_KVM
35
#define dprintf(fmt, ...) \
36
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
37
#else
38
#define dprintf(fmt, ...) \
39
    do { } while (0)
40
#endif
41

    
42
typedef struct KVMSlot
43
{
44
    target_phys_addr_t start_addr;
45
    ram_addr_t memory_size;
46
    ram_addr_t phys_offset;
47
    int slot;
48
    int flags;
49
} KVMSlot;
50

    
51
typedef struct kvm_dirty_log KVMDirtyLog;
52

    
53
int kvm_allowed = 0;
54

    
55
struct KVMState
56
{
57
    KVMSlot slots[32];
58
    int fd;
59
    int vmfd;
60
    int coalesced_mmio;
61
#ifdef KVM_CAP_COALESCED_MMIO
62
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
63
#endif
64
    int broken_set_mem_region;
65
    int migration_log;
66
    int vcpu_events;
67
#ifdef KVM_CAP_SET_GUEST_DEBUG
68
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
69
#endif
70
    int irqchip_in_kernel;
71
    int pit_in_kernel;
72
};
73

    
74
static KVMState *kvm_state;
75

    
76
static KVMSlot *kvm_alloc_slot(KVMState *s)
77
{
78
    int i;
79

    
80
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
81
        /* KVM private memory slots */
82
        if (i >= 8 && i < 12)
83
            continue;
84
        if (s->slots[i].memory_size == 0)
85
            return &s->slots[i];
86
    }
87

    
88
    fprintf(stderr, "%s: no free slot available\n", __func__);
89
    abort();
90
}
91

    
92
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
93
                                         target_phys_addr_t start_addr,
94
                                         target_phys_addr_t end_addr)
95
{
96
    int i;
97

    
98
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
99
        KVMSlot *mem = &s->slots[i];
100

    
101
        if (start_addr == mem->start_addr &&
102
            end_addr == mem->start_addr + mem->memory_size) {
103
            return mem;
104
        }
105
    }
106

    
107
    return NULL;
108
}
109

    
110
/*
111
 * Find overlapping slot with lowest start address
112
 */
113
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
114
                                            target_phys_addr_t start_addr,
115
                                            target_phys_addr_t end_addr)
116
{
117
    KVMSlot *found = NULL;
118
    int i;
119

    
120
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
121
        KVMSlot *mem = &s->slots[i];
122

    
123
        if (mem->memory_size == 0 ||
124
            (found && found->start_addr < mem->start_addr)) {
125
            continue;
126
        }
127

    
128
        if (end_addr > mem->start_addr &&
129
            start_addr < mem->start_addr + mem->memory_size) {
130
            found = mem;
131
        }
132
    }
133

    
134
    return found;
135
}
136

    
137
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
138
{
139
    struct kvm_userspace_memory_region mem;
140

    
141
    mem.slot = slot->slot;
142
    mem.guest_phys_addr = slot->start_addr;
143
    mem.memory_size = slot->memory_size;
144
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
145
    mem.flags = slot->flags;
146
    if (s->migration_log) {
147
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
148
    }
149
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
150
}
151

    
152
static void kvm_reset_vcpu(void *opaque)
153
{
154
    CPUState *env = opaque;
155

    
156
    kvm_arch_reset_vcpu(env);
157
    if (kvm_arch_put_registers(env)) {
158
        fprintf(stderr, "Fatal: kvm vcpu reset failed\n");
159
        abort();
160
    }
161
}
162

    
163
int kvm_irqchip_in_kernel(void)
164
{
165
    return kvm_state->irqchip_in_kernel;
166
}
167

    
168
int kvm_pit_in_kernel(void)
169
{
170
    return kvm_state->pit_in_kernel;
171
}
172

    
173

    
174
int kvm_init_vcpu(CPUState *env)
175
{
176
    KVMState *s = kvm_state;
177
    long mmap_size;
178
    int ret;
179

    
180
    dprintf("kvm_init_vcpu\n");
181

    
182
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
183
    if (ret < 0) {
184
        dprintf("kvm_create_vcpu failed\n");
185
        goto err;
186
    }
187

    
188
    env->kvm_fd = ret;
189
    env->kvm_state = s;
190

    
191
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
192
    if (mmap_size < 0) {
193
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
194
        goto err;
195
    }
196

    
197
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
198
                        env->kvm_fd, 0);
199
    if (env->kvm_run == MAP_FAILED) {
200
        ret = -errno;
201
        dprintf("mmap'ing vcpu state failed\n");
202
        goto err;
203
    }
204

    
205
#ifdef KVM_CAP_COALESCED_MMIO
206
    if (s->coalesced_mmio && !s->coalesced_mmio_ring)
207
        s->coalesced_mmio_ring = (void *) env->kvm_run +
208
                s->coalesced_mmio * PAGE_SIZE;
209
#endif
210

    
211
    ret = kvm_arch_init_vcpu(env);
212
    if (ret == 0) {
213
        qemu_register_reset(kvm_reset_vcpu, env);
214
        kvm_arch_reset_vcpu(env);
215
        ret = kvm_arch_put_registers(env);
216
    }
217
err:
218
    return ret;
219
}
220

    
221
/*
222
 * dirty pages logging control
223
 */
224
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
225
                                      ram_addr_t size, int flags, int mask)
226
{
227
    KVMState *s = kvm_state;
228
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
229
    int old_flags;
230

    
231
    if (mem == NULL)  {
232
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
233
                    TARGET_FMT_plx "\n", __func__, phys_addr,
234
                    (target_phys_addr_t)(phys_addr + size - 1));
235
            return -EINVAL;
236
    }
237

    
238
    old_flags = mem->flags;
239

    
240
    flags = (mem->flags & ~mask) | flags;
241
    mem->flags = flags;
242

    
243
    /* If nothing changed effectively, no need to issue ioctl */
244
    if (s->migration_log) {
245
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
246
    }
247
    if (flags == old_flags) {
248
            return 0;
249
    }
250

    
251
    return kvm_set_user_memory_region(s, mem);
252
}
253

    
254
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
255
{
256
        return kvm_dirty_pages_log_change(phys_addr, size,
257
                                          KVM_MEM_LOG_DIRTY_PAGES,
258
                                          KVM_MEM_LOG_DIRTY_PAGES);
259
}
260

    
261
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
262
{
263
        return kvm_dirty_pages_log_change(phys_addr, size,
264
                                          0,
265
                                          KVM_MEM_LOG_DIRTY_PAGES);
266
}
267

    
268
static int kvm_set_migration_log(int enable)
269
{
270
    KVMState *s = kvm_state;
271
    KVMSlot *mem;
272
    int i, err;
273

    
274
    s->migration_log = enable;
275

    
276
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
277
        mem = &s->slots[i];
278

    
279
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
280
            continue;
281
        }
282
        err = kvm_set_user_memory_region(s, mem);
283
        if (err) {
284
            return err;
285
        }
286
    }
287
    return 0;
288
}
289

    
290
static int test_le_bit(unsigned long nr, unsigned char *addr)
291
{
292
    return (addr[nr >> 3] >> (nr & 7)) & 1;
293
}
294

    
295
/**
296
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
297
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
298
 * This means all bits are set to dirty.
299
 *
300
 * @start_add: start of logged region.
301
 * @end_addr: end of logged region.
302
 */
303
static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
304
                                          target_phys_addr_t end_addr)
305
{
306
    KVMState *s = kvm_state;
307
    unsigned long size, allocated_size = 0;
308
    target_phys_addr_t phys_addr;
309
    ram_addr_t addr;
310
    KVMDirtyLog d;
311
    KVMSlot *mem;
312
    int ret = 0;
313

    
314
    d.dirty_bitmap = NULL;
315
    while (start_addr < end_addr) {
316
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
317
        if (mem == NULL) {
318
            break;
319
        }
320

    
321
        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
322
        if (!d.dirty_bitmap) {
323
            d.dirty_bitmap = qemu_malloc(size);
324
        } else if (size > allocated_size) {
325
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
326
        }
327
        allocated_size = size;
328
        memset(d.dirty_bitmap, 0, allocated_size);
329

    
330
        d.slot = mem->slot;
331

    
332
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
333
            dprintf("ioctl failed %d\n", errno);
334
            ret = -1;
335
            break;
336
        }
337

    
338
        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
339
             phys_addr < mem->start_addr + mem->memory_size;
340
             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
341
            unsigned char *bitmap = (unsigned char *)d.dirty_bitmap;
342
            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
343

    
344
            if (test_le_bit(nr, bitmap)) {
345
                cpu_physical_memory_set_dirty(addr);
346
            }
347
        }
348
        start_addr = phys_addr;
349
    }
350
    qemu_free(d.dirty_bitmap);
351

    
352
    return ret;
353
}
354

    
355
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
356
{
357
    int ret = -ENOSYS;
358
#ifdef KVM_CAP_COALESCED_MMIO
359
    KVMState *s = kvm_state;
360

    
361
    if (s->coalesced_mmio) {
362
        struct kvm_coalesced_mmio_zone zone;
363

    
364
        zone.addr = start;
365
        zone.size = size;
366

    
367
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
368
    }
369
#endif
370

    
371
    return ret;
372
}
373

    
374
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
375
{
376
    int ret = -ENOSYS;
377
#ifdef KVM_CAP_COALESCED_MMIO
378
    KVMState *s = kvm_state;
379

    
380
    if (s->coalesced_mmio) {
381
        struct kvm_coalesced_mmio_zone zone;
382

    
383
        zone.addr = start;
384
        zone.size = size;
385

    
386
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
387
    }
388
#endif
389

    
390
    return ret;
391
}
392

    
393
int kvm_check_extension(KVMState *s, unsigned int extension)
394
{
395
    int ret;
396

    
397
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
398
    if (ret < 0) {
399
        ret = 0;
400
    }
401

    
402
    return ret;
403
}
404

    
405
static void kvm_set_phys_mem(target_phys_addr_t start_addr,
406
                             ram_addr_t size,
407
                             ram_addr_t phys_offset)
408
{
409
    KVMState *s = kvm_state;
410
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
411
    KVMSlot *mem, old;
412
    int err;
413

    
414
    if (start_addr & ~TARGET_PAGE_MASK) {
415
        if (flags >= IO_MEM_UNASSIGNED) {
416
            if (!kvm_lookup_overlapping_slot(s, start_addr,
417
                                             start_addr + size)) {
418
                return;
419
            }
420
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
421
        } else {
422
            fprintf(stderr, "Only page-aligned memory slots supported\n");
423
        }
424
        abort();
425
    }
426

    
427
    /* KVM does not support read-only slots */
428
    phys_offset &= ~IO_MEM_ROM;
429

    
430
    while (1) {
431
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
432
        if (!mem) {
433
            break;
434
        }
435

    
436
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
437
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
438
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
439
            /* The new slot fits into the existing one and comes with
440
             * identical parameters - nothing to be done. */
441
            return;
442
        }
443

    
444
        old = *mem;
445

    
446
        /* unregister the overlapping slot */
447
        mem->memory_size = 0;
448
        err = kvm_set_user_memory_region(s, mem);
449
        if (err) {
450
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
451
                    __func__, strerror(-err));
452
            abort();
453
        }
454

    
455
        /* Workaround for older KVM versions: we can't join slots, even not by
456
         * unregistering the previous ones and then registering the larger
457
         * slot. We have to maintain the existing fragmentation. Sigh.
458
         *
459
         * This workaround assumes that the new slot starts at the same
460
         * address as the first existing one. If not or if some overlapping
461
         * slot comes around later, we will fail (not seen in practice so far)
462
         * - and actually require a recent KVM version. */
463
        if (s->broken_set_mem_region &&
464
            old.start_addr == start_addr && old.memory_size < size &&
465
            flags < IO_MEM_UNASSIGNED) {
466
            mem = kvm_alloc_slot(s);
467
            mem->memory_size = old.memory_size;
468
            mem->start_addr = old.start_addr;
469
            mem->phys_offset = old.phys_offset;
470
            mem->flags = 0;
471

    
472
            err = kvm_set_user_memory_region(s, mem);
473
            if (err) {
474
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
475
                        strerror(-err));
476
                abort();
477
            }
478

    
479
            start_addr += old.memory_size;
480
            phys_offset += old.memory_size;
481
            size -= old.memory_size;
482
            continue;
483
        }
484

    
485
        /* register prefix slot */
486
        if (old.start_addr < start_addr) {
487
            mem = kvm_alloc_slot(s);
488
            mem->memory_size = start_addr - old.start_addr;
489
            mem->start_addr = old.start_addr;
490
            mem->phys_offset = old.phys_offset;
491
            mem->flags = 0;
492

    
493
            err = kvm_set_user_memory_region(s, mem);
494
            if (err) {
495
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
496
                        __func__, strerror(-err));
497
                abort();
498
            }
499
        }
500

    
501
        /* register suffix slot */
502
        if (old.start_addr + old.memory_size > start_addr + size) {
503
            ram_addr_t size_delta;
504

    
505
            mem = kvm_alloc_slot(s);
506
            mem->start_addr = start_addr + size;
507
            size_delta = mem->start_addr - old.start_addr;
508
            mem->memory_size = old.memory_size - size_delta;
509
            mem->phys_offset = old.phys_offset + size_delta;
510
            mem->flags = 0;
511

    
512
            err = kvm_set_user_memory_region(s, mem);
513
            if (err) {
514
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
515
                        __func__, strerror(-err));
516
                abort();
517
            }
518
        }
519
    }
520

    
521
    /* in case the KVM bug workaround already "consumed" the new slot */
522
    if (!size)
523
        return;
524

    
525
    /* KVM does not need to know about this memory */
526
    if (flags >= IO_MEM_UNASSIGNED)
527
        return;
528

    
529
    mem = kvm_alloc_slot(s);
530
    mem->memory_size = size;
531
    mem->start_addr = start_addr;
532
    mem->phys_offset = phys_offset;
533
    mem->flags = 0;
534

    
535
    err = kvm_set_user_memory_region(s, mem);
536
    if (err) {
537
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
538
                strerror(-err));
539
        abort();
540
    }
541
}
542

    
543
static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
544
                                  target_phys_addr_t start_addr,
545
                                  ram_addr_t size,
546
                                  ram_addr_t phys_offset)
547
{
548
        kvm_set_phys_mem(start_addr, size, phys_offset);
549
}
550

    
551
static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
552
                                        target_phys_addr_t start_addr,
553
                                        target_phys_addr_t end_addr)
554
{
555
        return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
556
}
557

    
558
static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
559
                                    int enable)
560
{
561
        return kvm_set_migration_log(enable);
562
}
563

    
564
static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
565
        .set_memory = kvm_client_set_memory,
566
        .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
567
        .migration_log = kvm_client_migration_log,
568
};
569

    
570
int kvm_init(int smp_cpus)
571
{
572
    static const char upgrade_note[] =
573
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
574
        "(see http://sourceforge.net/projects/kvm).\n";
575
    KVMState *s;
576
    int ret;
577
    int i;
578

    
579
    if (smp_cpus > 1) {
580
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
581
        return -EINVAL;
582
    }
583

    
584
    s = qemu_mallocz(sizeof(KVMState));
585

    
586
#ifdef KVM_CAP_SET_GUEST_DEBUG
587
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
588
#endif
589
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
590
        s->slots[i].slot = i;
591

    
592
    s->vmfd = -1;
593
    s->fd = qemu_open("/dev/kvm", O_RDWR);
594
    if (s->fd == -1) {
595
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
596
        ret = -errno;
597
        goto err;
598
    }
599

    
600
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
601
    if (ret < KVM_API_VERSION) {
602
        if (ret > 0)
603
            ret = -EINVAL;
604
        fprintf(stderr, "kvm version too old\n");
605
        goto err;
606
    }
607

    
608
    if (ret > KVM_API_VERSION) {
609
        ret = -EINVAL;
610
        fprintf(stderr, "kvm version not supported\n");
611
        goto err;
612
    }
613

    
614
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
615
    if (s->vmfd < 0)
616
        goto err;
617

    
618
    /* initially, KVM allocated its own memory and we had to jump through
619
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
620
     * just use a user allocated buffer so we can use regular pages
621
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
622
     */
623
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
624
        ret = -EINVAL;
625
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
626
                upgrade_note);
627
        goto err;
628
    }
629

    
630
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
631
     * destroyed properly.  Since we rely on this capability, refuse to work
632
     * with any kernel without this capability. */
633
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
634
        ret = -EINVAL;
635

    
636
        fprintf(stderr,
637
                "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
638
                upgrade_note);
639
        goto err;
640
    }
641

    
642
    s->coalesced_mmio = 0;
643
#ifdef KVM_CAP_COALESCED_MMIO
644
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
645
    s->coalesced_mmio_ring = NULL;
646
#endif
647

    
648
    s->broken_set_mem_region = 1;
649
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
650
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
651
    if (ret > 0) {
652
        s->broken_set_mem_region = 0;
653
    }
654
#endif
655

    
656
    s->vcpu_events = 0;
657
#ifdef KVM_CAP_VCPU_EVENTS
658
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
659
#endif
660

    
661
    ret = kvm_arch_init(s, smp_cpus);
662
    if (ret < 0)
663
        goto err;
664

    
665
    kvm_state = s;
666
    cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
667

    
668
    return 0;
669

    
670
err:
671
    if (s) {
672
        if (s->vmfd != -1)
673
            close(s->vmfd);
674
        if (s->fd != -1)
675
            close(s->fd);
676
    }
677
    qemu_free(s);
678

    
679
    return ret;
680
}
681

    
682
static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
683
                         uint32_t count)
684
{
685
    int i;
686
    uint8_t *ptr = data;
687

    
688
    for (i = 0; i < count; i++) {
689
        if (direction == KVM_EXIT_IO_IN) {
690
            switch (size) {
691
            case 1:
692
                stb_p(ptr, cpu_inb(port));
693
                break;
694
            case 2:
695
                stw_p(ptr, cpu_inw(port));
696
                break;
697
            case 4:
698
                stl_p(ptr, cpu_inl(port));
699
                break;
700
            }
701
        } else {
702
            switch (size) {
703
            case 1:
704
                cpu_outb(port, ldub_p(ptr));
705
                break;
706
            case 2:
707
                cpu_outw(port, lduw_p(ptr));
708
                break;
709
            case 4:
710
                cpu_outl(port, ldl_p(ptr));
711
                break;
712
            }
713
        }
714

    
715
        ptr += size;
716
    }
717

    
718
    return 1;
719
}
720

    
721
void kvm_flush_coalesced_mmio_buffer(void)
722
{
723
#ifdef KVM_CAP_COALESCED_MMIO
724
    KVMState *s = kvm_state;
725
    if (s->coalesced_mmio_ring) {
726
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
727
        while (ring->first != ring->last) {
728
            struct kvm_coalesced_mmio *ent;
729

    
730
            ent = &ring->coalesced_mmio[ring->first];
731

    
732
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
733
            /* FIXME smp_wmb() */
734
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
735
        }
736
    }
737
#endif
738
}
739

    
740
void kvm_cpu_synchronize_state(CPUState *env)
741
{
742
    if (!env->kvm_vcpu_dirty) {
743
        kvm_arch_get_registers(env);
744
        env->kvm_vcpu_dirty = 1;
745
    }
746
}
747

    
748
int kvm_cpu_exec(CPUState *env)
749
{
750
    struct kvm_run *run = env->kvm_run;
751
    int ret;
752

    
753
    dprintf("kvm_cpu_exec()\n");
754

    
755
    do {
756
        if (env->exit_request) {
757
            dprintf("interrupt exit requested\n");
758
            ret = 0;
759
            break;
760
        }
761

    
762
        if (env->kvm_vcpu_dirty) {
763
            kvm_arch_put_registers(env);
764
            env->kvm_vcpu_dirty = 0;
765
        }
766

    
767
        kvm_arch_pre_run(env, run);
768
        qemu_mutex_unlock_iothread();
769
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
770
        qemu_mutex_lock_iothread();
771
        kvm_arch_post_run(env, run);
772

    
773
        if (ret == -EINTR || ret == -EAGAIN) {
774
            dprintf("io window exit\n");
775
            ret = 0;
776
            break;
777
        }
778

    
779
        if (ret < 0) {
780
            dprintf("kvm run failed %s\n", strerror(-ret));
781
            abort();
782
        }
783

    
784
        kvm_flush_coalesced_mmio_buffer();
785

    
786
        ret = 0; /* exit loop */
787
        switch (run->exit_reason) {
788
        case KVM_EXIT_IO:
789
            dprintf("handle_io\n");
790
            ret = kvm_handle_io(run->io.port,
791
                                (uint8_t *)run + run->io.data_offset,
792
                                run->io.direction,
793
                                run->io.size,
794
                                run->io.count);
795
            break;
796
        case KVM_EXIT_MMIO:
797
            dprintf("handle_mmio\n");
798
            cpu_physical_memory_rw(run->mmio.phys_addr,
799
                                   run->mmio.data,
800
                                   run->mmio.len,
801
                                   run->mmio.is_write);
802
            ret = 1;
803
            break;
804
        case KVM_EXIT_IRQ_WINDOW_OPEN:
805
            dprintf("irq_window_open\n");
806
            break;
807
        case KVM_EXIT_SHUTDOWN:
808
            dprintf("shutdown\n");
809
            qemu_system_reset_request();
810
            ret = 1;
811
            break;
812
        case KVM_EXIT_UNKNOWN:
813
            dprintf("kvm_exit_unknown\n");
814
            break;
815
        case KVM_EXIT_FAIL_ENTRY:
816
            dprintf("kvm_exit_fail_entry\n");
817
            break;
818
        case KVM_EXIT_EXCEPTION:
819
            dprintf("kvm_exit_exception\n");
820
            break;
821
        case KVM_EXIT_DEBUG:
822
            dprintf("kvm_exit_debug\n");
823
#ifdef KVM_CAP_SET_GUEST_DEBUG
824
            if (kvm_arch_debug(&run->debug.arch)) {
825
                gdb_set_stop_cpu(env);
826
                vm_stop(EXCP_DEBUG);
827
                env->exception_index = EXCP_DEBUG;
828
                return 0;
829
            }
830
            /* re-enter, this exception was guest-internal */
831
            ret = 1;
832
#endif /* KVM_CAP_SET_GUEST_DEBUG */
833
            break;
834
        default:
835
            dprintf("kvm_arch_handle_exit\n");
836
            ret = kvm_arch_handle_exit(env, run);
837
            break;
838
        }
839
    } while (ret > 0);
840

    
841
    if (env->exit_request) {
842
        env->exit_request = 0;
843
        env->exception_index = EXCP_INTERRUPT;
844
    }
845

    
846
    return ret;
847
}
848

    
849
int kvm_ioctl(KVMState *s, int type, ...)
850
{
851
    int ret;
852
    void *arg;
853
    va_list ap;
854

    
855
    va_start(ap, type);
856
    arg = va_arg(ap, void *);
857
    va_end(ap);
858

    
859
    ret = ioctl(s->fd, type, arg);
860
    if (ret == -1)
861
        ret = -errno;
862

    
863
    return ret;
864
}
865

    
866
int kvm_vm_ioctl(KVMState *s, int type, ...)
867
{
868
    int ret;
869
    void *arg;
870
    va_list ap;
871

    
872
    va_start(ap, type);
873
    arg = va_arg(ap, void *);
874
    va_end(ap);
875

    
876
    ret = ioctl(s->vmfd, type, arg);
877
    if (ret == -1)
878
        ret = -errno;
879

    
880
    return ret;
881
}
882

    
883
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
884
{
885
    int ret;
886
    void *arg;
887
    va_list ap;
888

    
889
    va_start(ap, type);
890
    arg = va_arg(ap, void *);
891
    va_end(ap);
892

    
893
    ret = ioctl(env->kvm_fd, type, arg);
894
    if (ret == -1)
895
        ret = -errno;
896

    
897
    return ret;
898
}
899

    
900
int kvm_has_sync_mmu(void)
901
{
902
#ifdef KVM_CAP_SYNC_MMU
903
    KVMState *s = kvm_state;
904

    
905
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
906
#else
907
    return 0;
908
#endif
909
}
910

    
911
int kvm_has_vcpu_events(void)
912
{
913
    return kvm_state->vcpu_events;
914
}
915

    
916
void kvm_setup_guest_memory(void *start, size_t size)
917
{
918
    if (!kvm_has_sync_mmu()) {
919
#ifdef MADV_DONTFORK
920
        int ret = madvise(start, size, MADV_DONTFORK);
921

    
922
        if (ret) {
923
            perror("madvice");
924
            exit(1);
925
        }
926
#else
927
        fprintf(stderr,
928
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
929
        exit(1);
930
#endif
931
    }
932
}
933

    
934
#ifdef KVM_CAP_SET_GUEST_DEBUG
935
static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
936
{
937
#ifdef CONFIG_IOTHREAD
938
    if (env != cpu_single_env) {
939
        abort();
940
    }
941
#endif
942
    func(data);
943
}
944

    
945
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
946
                                                 target_ulong pc)
947
{
948
    struct kvm_sw_breakpoint *bp;
949

    
950
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
951
        if (bp->pc == pc)
952
            return bp;
953
    }
954
    return NULL;
955
}
956

    
957
int kvm_sw_breakpoints_active(CPUState *env)
958
{
959
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
960
}
961

    
962
struct kvm_set_guest_debug_data {
963
    struct kvm_guest_debug dbg;
964
    CPUState *env;
965
    int err;
966
};
967

    
968
static void kvm_invoke_set_guest_debug(void *data)
969
{
970
    struct kvm_set_guest_debug_data *dbg_data = data;
971
    CPUState *env = dbg_data->env;
972

    
973
    if (env->kvm_vcpu_dirty) {
974
        kvm_arch_put_registers(env);
975
        env->kvm_vcpu_dirty = 0;
976
    }
977
    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
978
}
979

    
980
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
981
{
982
    struct kvm_set_guest_debug_data data;
983

    
984
    data.dbg.control = 0;
985
    if (env->singlestep_enabled)
986
        data.dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
987

    
988
    kvm_arch_update_guest_debug(env, &data.dbg);
989
    data.dbg.control |= reinject_trap;
990
    data.env = env;
991

    
992
    on_vcpu(env, kvm_invoke_set_guest_debug, &data);
993
    return data.err;
994
}
995

    
996
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
997
                          target_ulong len, int type)
998
{
999
    struct kvm_sw_breakpoint *bp;
1000
    CPUState *env;
1001
    int err;
1002

    
1003
    if (type == GDB_BREAKPOINT_SW) {
1004
        bp = kvm_find_sw_breakpoint(current_env, addr);
1005
        if (bp) {
1006
            bp->use_count++;
1007
            return 0;
1008
        }
1009

    
1010
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1011
        if (!bp)
1012
            return -ENOMEM;
1013

    
1014
        bp->pc = addr;
1015
        bp->use_count = 1;
1016
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1017
        if (err) {
1018
            free(bp);
1019
            return err;
1020
        }
1021

    
1022
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1023
                          bp, entry);
1024
    } else {
1025
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1026
        if (err)
1027
            return err;
1028
    }
1029

    
1030
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1031
        err = kvm_update_guest_debug(env, 0);
1032
        if (err)
1033
            return err;
1034
    }
1035
    return 0;
1036
}
1037

    
1038
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1039
                          target_ulong len, int type)
1040
{
1041
    struct kvm_sw_breakpoint *bp;
1042
    CPUState *env;
1043
    int err;
1044

    
1045
    if (type == GDB_BREAKPOINT_SW) {
1046
        bp = kvm_find_sw_breakpoint(current_env, addr);
1047
        if (!bp)
1048
            return -ENOENT;
1049

    
1050
        if (bp->use_count > 1) {
1051
            bp->use_count--;
1052
            return 0;
1053
        }
1054

    
1055
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1056
        if (err)
1057
            return err;
1058

    
1059
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1060
        qemu_free(bp);
1061
    } else {
1062
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1063
        if (err)
1064
            return err;
1065
    }
1066

    
1067
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1068
        err = kvm_update_guest_debug(env, 0);
1069
        if (err)
1070
            return err;
1071
    }
1072
    return 0;
1073
}
1074

    
1075
void kvm_remove_all_breakpoints(CPUState *current_env)
1076
{
1077
    struct kvm_sw_breakpoint *bp, *next;
1078
    KVMState *s = current_env->kvm_state;
1079
    CPUState *env;
1080

    
1081
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1082
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1083
            /* Try harder to find a CPU that currently sees the breakpoint. */
1084
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
1085
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1086
                    break;
1087
            }
1088
        }
1089
    }
1090
    kvm_arch_remove_all_hw_breakpoints();
1091

    
1092
    for (env = first_cpu; env != NULL; env = env->next_cpu)
1093
        kvm_update_guest_debug(env, 0);
1094
}
1095

    
1096
#else /* !KVM_CAP_SET_GUEST_DEBUG */
1097

    
1098
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1099
{
1100
    return -EINVAL;
1101
}
1102

    
1103
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1104
                          target_ulong len, int type)
1105
{
1106
    return -EINVAL;
1107
}
1108

    
1109
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1110
                          target_ulong len, int type)
1111
{
1112
    return -EINVAL;
1113
}
1114

    
1115
void kvm_remove_all_breakpoints(CPUState *current_env)
1116
{
1117
}
1118
#endif /* !KVM_CAP_SET_GUEST_DEBUG */