Statistics
| Branch: | Revision:

root / kvm-all.c @ 4495d6a7

History | View | Annotate | Download (25.4 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "gdbstub.h"
26
#include "kvm.h"
27

    
28
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
29
#define PAGE_SIZE TARGET_PAGE_SIZE
30

    
31
//#define DEBUG_KVM
32

    
33
#ifdef DEBUG_KVM
34
#define dprintf(fmt, ...) \
35
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
36
#else
37
#define dprintf(fmt, ...) \
38
    do { } while (0)
39
#endif
40

    
41
typedef struct KVMSlot
42
{
43
    target_phys_addr_t start_addr;
44
    ram_addr_t memory_size;
45
    ram_addr_t phys_offset;
46
    int slot;
47
    int flags;
48
} KVMSlot;
49

    
50
typedef struct kvm_dirty_log KVMDirtyLog;
51

    
52
int kvm_allowed = 0;
53

    
54
struct KVMState
55
{
56
    KVMSlot slots[32];
57
    int fd;
58
    int vmfd;
59
    int coalesced_mmio;
60
    int broken_set_mem_region;
61
    int migration_log;
62
#ifdef KVM_CAP_SET_GUEST_DEBUG
63
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
64
#endif
65
};
66

    
67
static KVMState *kvm_state;
68

    
69
static KVMSlot *kvm_alloc_slot(KVMState *s)
70
{
71
    int i;
72

    
73
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
74
        /* KVM private memory slots */
75
        if (i >= 8 && i < 12)
76
            continue;
77
        if (s->slots[i].memory_size == 0)
78
            return &s->slots[i];
79
    }
80

    
81
    fprintf(stderr, "%s: no free slot available\n", __func__);
82
    abort();
83
}
84

    
85
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
86
                                         target_phys_addr_t start_addr,
87
                                         target_phys_addr_t end_addr)
88
{
89
    int i;
90

    
91
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
92
        KVMSlot *mem = &s->slots[i];
93

    
94
        if (start_addr == mem->start_addr &&
95
            end_addr == mem->start_addr + mem->memory_size) {
96
            return mem;
97
        }
98
    }
99

    
100
    return NULL;
101
}
102

    
103
/*
104
 * Find overlapping slot with lowest start address
105
 */
106
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
107
                                            target_phys_addr_t start_addr,
108
                                            target_phys_addr_t end_addr)
109
{
110
    KVMSlot *found = NULL;
111
    int i;
112

    
113
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
114
        KVMSlot *mem = &s->slots[i];
115

    
116
        if (mem->memory_size == 0 ||
117
            (found && found->start_addr < mem->start_addr)) {
118
            continue;
119
        }
120

    
121
        if (end_addr > mem->start_addr &&
122
            start_addr < mem->start_addr + mem->memory_size) {
123
            found = mem;
124
        }
125
    }
126

    
127
    return found;
128
}
129

    
130
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
131
{
132
    struct kvm_userspace_memory_region mem;
133

    
134
    mem.slot = slot->slot;
135
    mem.guest_phys_addr = slot->start_addr;
136
    mem.memory_size = slot->memory_size;
137
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
138
    mem.flags = slot->flags;
139
    if (s->migration_log) {
140
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
141
    }
142
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
143
}
144

    
145

    
146
int kvm_init_vcpu(CPUState *env)
147
{
148
    KVMState *s = kvm_state;
149
    long mmap_size;
150
    int ret;
151

    
152
    dprintf("kvm_init_vcpu\n");
153

    
154
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
155
    if (ret < 0) {
156
        dprintf("kvm_create_vcpu failed\n");
157
        goto err;
158
    }
159

    
160
    env->kvm_fd = ret;
161
    env->kvm_state = s;
162

    
163
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
164
    if (mmap_size < 0) {
165
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
166
        goto err;
167
    }
168

    
169
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
170
                        env->kvm_fd, 0);
171
    if (env->kvm_run == MAP_FAILED) {
172
        ret = -errno;
173
        dprintf("mmap'ing vcpu state failed\n");
174
        goto err;
175
    }
176

    
177
    ret = kvm_arch_init_vcpu(env);
178

    
179
err:
180
    return ret;
181
}
182

    
183
int kvm_sync_vcpus(void)
184
{
185
    CPUState *env;
186

    
187
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
188
        int ret;
189

    
190
        ret = kvm_arch_put_registers(env);
191
        if (ret)
192
            return ret;
193
    }
194

    
195
    return 0;
196
}
197

    
198
/*
199
 * dirty pages logging control
200
 */
201
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
202
                                      ram_addr_t size, int flags, int mask)
203
{
204
    KVMState *s = kvm_state;
205
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
206
    int old_flags;
207

    
208
    if (mem == NULL)  {
209
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
210
                    TARGET_FMT_plx "\n", __func__, phys_addr,
211
                    phys_addr + size - 1);
212
            return -EINVAL;
213
    }
214

    
215
    old_flags = mem->flags;
216

    
217
    flags = (mem->flags & ~mask) | flags;
218
    mem->flags = flags;
219

    
220
    /* If nothing changed effectively, no need to issue ioctl */
221
    if (s->migration_log) {
222
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
223
    }
224
    if (flags == old_flags) {
225
            return 0;
226
    }
227

    
228
    return kvm_set_user_memory_region(s, mem);
229
}
230

    
231
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
232
{
233
        return kvm_dirty_pages_log_change(phys_addr, size,
234
                                          KVM_MEM_LOG_DIRTY_PAGES,
235
                                          KVM_MEM_LOG_DIRTY_PAGES);
236
}
237

    
238
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
239
{
240
        return kvm_dirty_pages_log_change(phys_addr, size,
241
                                          0,
242
                                          KVM_MEM_LOG_DIRTY_PAGES);
243
}
244

    
245
int kvm_set_migration_log(int enable)
246
{
247
    KVMState *s = kvm_state;
248
    KVMSlot *mem;
249
    int i, err;
250

    
251
    s->migration_log = enable;
252

    
253
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
254
        mem = &s->slots[i];
255

    
256
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
257
            continue;
258
        }
259
        err = kvm_set_user_memory_region(s, mem);
260
        if (err) {
261
            return err;
262
        }
263
    }
264
    return 0;
265
}
266

    
267
/**
268
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
269
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
270
 * This means all bits are set to dirty.
271
 *
272
 * @start_add: start of logged region.
273
 * @end_addr: end of logged region.
274
 */
275
void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
276
                                    target_phys_addr_t end_addr)
277
{
278
    KVMState *s = kvm_state;
279
    KVMDirtyLog d;
280
    KVMSlot *mem = kvm_lookup_matching_slot(s, start_addr, end_addr);
281
    unsigned long alloc_size;
282
    ram_addr_t addr;
283
    target_phys_addr_t phys_addr = start_addr;
284

    
285
    dprintf("sync addr: " TARGET_FMT_lx " into %lx\n", start_addr,
286
            mem->phys_offset);
287
    if (mem == NULL) {
288
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
289
                    TARGET_FMT_plx "\n", __func__, phys_addr, end_addr - 1);
290
            return;
291
    }
292

    
293
    alloc_size = mem->memory_size >> TARGET_PAGE_BITS / sizeof(d.dirty_bitmap);
294
    d.dirty_bitmap = qemu_mallocz(alloc_size);
295

    
296
    d.slot = mem->slot;
297
    dprintf("slot %d, phys_addr %llx, uaddr: %llx\n",
298
            d.slot, mem->start_addr, mem->phys_offset);
299

    
300
    if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
301
        dprintf("ioctl failed %d\n", errno);
302
        goto out;
303
    }
304

    
305
    phys_addr = start_addr;
306
    for (addr = mem->phys_offset; phys_addr < end_addr; phys_addr+= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
307
        unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
308
        unsigned nr = (phys_addr - start_addr) >> TARGET_PAGE_BITS;
309
        unsigned word = nr / (sizeof(*bitmap) * 8);
310
        unsigned bit = nr % (sizeof(*bitmap) * 8);
311
        if ((bitmap[word] >> bit) & 1)
312
            cpu_physical_memory_set_dirty(addr);
313
    }
314
out:
315
    qemu_free(d.dirty_bitmap);
316
}
317

    
318
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
319
{
320
    int ret = -ENOSYS;
321
#ifdef KVM_CAP_COALESCED_MMIO
322
    KVMState *s = kvm_state;
323

    
324
    if (s->coalesced_mmio) {
325
        struct kvm_coalesced_mmio_zone zone;
326

    
327
        zone.addr = start;
328
        zone.size = size;
329

    
330
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
331
    }
332
#endif
333

    
334
    return ret;
335
}
336

    
337
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
338
{
339
    int ret = -ENOSYS;
340
#ifdef KVM_CAP_COALESCED_MMIO
341
    KVMState *s = kvm_state;
342

    
343
    if (s->coalesced_mmio) {
344
        struct kvm_coalesced_mmio_zone zone;
345

    
346
        zone.addr = start;
347
        zone.size = size;
348

    
349
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
350
    }
351
#endif
352

    
353
    return ret;
354
}
355

    
356
int kvm_check_extension(KVMState *s, unsigned int extension)
357
{
358
    int ret;
359

    
360
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
361
    if (ret < 0) {
362
        ret = 0;
363
    }
364

    
365
    return ret;
366
}
367

    
368
int kvm_init(int smp_cpus)
369
{
370
    KVMState *s;
371
    int ret;
372
    int i;
373

    
374
    if (smp_cpus > 1) {
375
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
376
        return -EINVAL;
377
    }
378

    
379
    s = qemu_mallocz(sizeof(KVMState));
380

    
381
#ifdef KVM_CAP_SET_GUEST_DEBUG
382
    TAILQ_INIT(&s->kvm_sw_breakpoints);
383
#endif
384
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
385
        s->slots[i].slot = i;
386

    
387
    s->vmfd = -1;
388
    s->fd = open("/dev/kvm", O_RDWR);
389
    if (s->fd == -1) {
390
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
391
        ret = -errno;
392
        goto err;
393
    }
394

    
395
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
396
    if (ret < KVM_API_VERSION) {
397
        if (ret > 0)
398
            ret = -EINVAL;
399
        fprintf(stderr, "kvm version too old\n");
400
        goto err;
401
    }
402

    
403
    if (ret > KVM_API_VERSION) {
404
        ret = -EINVAL;
405
        fprintf(stderr, "kvm version not supported\n");
406
        goto err;
407
    }
408

    
409
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
410
    if (s->vmfd < 0)
411
        goto err;
412

    
413
    /* initially, KVM allocated its own memory and we had to jump through
414
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
415
     * just use a user allocated buffer so we can use regular pages
416
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
417
     */
418
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
419
        ret = -EINVAL;
420
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
421
        goto err;
422
    }
423

    
424
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
425
     * destroyed properly.  Since we rely on this capability, refuse to work
426
     * with any kernel without this capability. */
427
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
428
        ret = -EINVAL;
429

    
430
        fprintf(stderr,
431
                "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
432
                "Please upgrade to at least kvm-81.\n");
433
        goto err;
434
    }
435

    
436
#ifdef KVM_CAP_COALESCED_MMIO
437
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
438
#else
439
    s->coalesced_mmio = 0;
440
#endif
441

    
442
    s->broken_set_mem_region = 1;
443
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
444
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
445
    if (ret > 0) {
446
        s->broken_set_mem_region = 0;
447
    }
448
#endif
449

    
450
    ret = kvm_arch_init(s, smp_cpus);
451
    if (ret < 0)
452
        goto err;
453

    
454
    kvm_state = s;
455

    
456
    return 0;
457

    
458
err:
459
    if (s) {
460
        if (s->vmfd != -1)
461
            close(s->vmfd);
462
        if (s->fd != -1)
463
            close(s->fd);
464
    }
465
    qemu_free(s);
466

    
467
    return ret;
468
}
469

    
470
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
471
                         int direction, int size, uint32_t count)
472
{
473
    int i;
474
    uint8_t *ptr = data;
475

    
476
    for (i = 0; i < count; i++) {
477
        if (direction == KVM_EXIT_IO_IN) {
478
            switch (size) {
479
            case 1:
480
                stb_p(ptr, cpu_inb(env, port));
481
                break;
482
            case 2:
483
                stw_p(ptr, cpu_inw(env, port));
484
                break;
485
            case 4:
486
                stl_p(ptr, cpu_inl(env, port));
487
                break;
488
            }
489
        } else {
490
            switch (size) {
491
            case 1:
492
                cpu_outb(env, port, ldub_p(ptr));
493
                break;
494
            case 2:
495
                cpu_outw(env, port, lduw_p(ptr));
496
                break;
497
            case 4:
498
                cpu_outl(env, port, ldl_p(ptr));
499
                break;
500
            }
501
        }
502

    
503
        ptr += size;
504
    }
505

    
506
    return 1;
507
}
508

    
509
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
510
{
511
#ifdef KVM_CAP_COALESCED_MMIO
512
    KVMState *s = kvm_state;
513
    if (s->coalesced_mmio) {
514
        struct kvm_coalesced_mmio_ring *ring;
515

    
516
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
517
        while (ring->first != ring->last) {
518
            struct kvm_coalesced_mmio *ent;
519

    
520
            ent = &ring->coalesced_mmio[ring->first];
521

    
522
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
523
            /* FIXME smp_wmb() */
524
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
525
        }
526
    }
527
#endif
528
}
529

    
530
int kvm_cpu_exec(CPUState *env)
531
{
532
    struct kvm_run *run = env->kvm_run;
533
    int ret;
534

    
535
    dprintf("kvm_cpu_exec()\n");
536

    
537
    do {
538
        kvm_arch_pre_run(env, run);
539

    
540
        if (env->exit_request) {
541
            dprintf("interrupt exit requested\n");
542
            ret = 0;
543
            break;
544
        }
545

    
546
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
547
        kvm_arch_post_run(env, run);
548

    
549
        if (ret == -EINTR || ret == -EAGAIN) {
550
            dprintf("io window exit\n");
551
            ret = 0;
552
            break;
553
        }
554

    
555
        if (ret < 0) {
556
            dprintf("kvm run failed %s\n", strerror(-ret));
557
            abort();
558
        }
559

    
560
        kvm_run_coalesced_mmio(env, run);
561

    
562
        ret = 0; /* exit loop */
563
        switch (run->exit_reason) {
564
        case KVM_EXIT_IO:
565
            dprintf("handle_io\n");
566
            ret = kvm_handle_io(env, run->io.port,
567
                                (uint8_t *)run + run->io.data_offset,
568
                                run->io.direction,
569
                                run->io.size,
570
                                run->io.count);
571
            break;
572
        case KVM_EXIT_MMIO:
573
            dprintf("handle_mmio\n");
574
            cpu_physical_memory_rw(run->mmio.phys_addr,
575
                                   run->mmio.data,
576
                                   run->mmio.len,
577
                                   run->mmio.is_write);
578
            ret = 1;
579
            break;
580
        case KVM_EXIT_IRQ_WINDOW_OPEN:
581
            dprintf("irq_window_open\n");
582
            break;
583
        case KVM_EXIT_SHUTDOWN:
584
            dprintf("shutdown\n");
585
            qemu_system_reset_request();
586
            ret = 1;
587
            break;
588
        case KVM_EXIT_UNKNOWN:
589
            dprintf("kvm_exit_unknown\n");
590
            break;
591
        case KVM_EXIT_FAIL_ENTRY:
592
            dprintf("kvm_exit_fail_entry\n");
593
            break;
594
        case KVM_EXIT_EXCEPTION:
595
            dprintf("kvm_exit_exception\n");
596
            break;
597
        case KVM_EXIT_DEBUG:
598
            dprintf("kvm_exit_debug\n");
599
#ifdef KVM_CAP_SET_GUEST_DEBUG
600
            if (kvm_arch_debug(&run->debug.arch)) {
601
                gdb_set_stop_cpu(env);
602
                vm_stop(EXCP_DEBUG);
603
                env->exception_index = EXCP_DEBUG;
604
                return 0;
605
            }
606
            /* re-enter, this exception was guest-internal */
607
            ret = 1;
608
#endif /* KVM_CAP_SET_GUEST_DEBUG */
609
            break;
610
        default:
611
            dprintf("kvm_arch_handle_exit\n");
612
            ret = kvm_arch_handle_exit(env, run);
613
            break;
614
        }
615
    } while (ret > 0);
616

    
617
    if (env->exit_request) {
618
        env->exit_request = 0;
619
        env->exception_index = EXCP_INTERRUPT;
620
    }
621

    
622
    return ret;
623
}
624

    
625
void kvm_set_phys_mem(target_phys_addr_t start_addr,
626
                      ram_addr_t size,
627
                      ram_addr_t phys_offset)
628
{
629
    KVMState *s = kvm_state;
630
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
631
    KVMSlot *mem, old;
632
    int err;
633

    
634
    if (start_addr & ~TARGET_PAGE_MASK) {
635
        if (flags >= IO_MEM_UNASSIGNED) {
636
            if (!kvm_lookup_overlapping_slot(s, start_addr,
637
                                             start_addr + size)) {
638
                return;
639
            }
640
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
641
        } else {
642
            fprintf(stderr, "Only page-aligned memory slots supported\n");
643
        }
644
        abort();
645
    }
646

    
647
    /* KVM does not support read-only slots */
648
    phys_offset &= ~IO_MEM_ROM;
649

    
650
    while (1) {
651
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
652
        if (!mem) {
653
            break;
654
        }
655

    
656
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
657
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
658
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
659
            /* The new slot fits into the existing one and comes with
660
             * identical parameters - nothing to be done. */
661
            return;
662
        }
663

    
664
        old = *mem;
665

    
666
        /* unregister the overlapping slot */
667
        mem->memory_size = 0;
668
        err = kvm_set_user_memory_region(s, mem);
669
        if (err) {
670
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
671
                    __func__, strerror(-err));
672
            abort();
673
        }
674

    
675
        /* Workaround for older KVM versions: we can't join slots, even not by
676
         * unregistering the previous ones and then registering the larger
677
         * slot. We have to maintain the existing fragmentation. Sigh.
678
         *
679
         * This workaround assumes that the new slot starts at the same
680
         * address as the first existing one. If not or if some overlapping
681
         * slot comes around later, we will fail (not seen in practice so far)
682
         * - and actually require a recent KVM version. */
683
        if (s->broken_set_mem_region &&
684
            old.start_addr == start_addr && old.memory_size < size &&
685
            flags < IO_MEM_UNASSIGNED) {
686
            mem = kvm_alloc_slot(s);
687
            mem->memory_size = old.memory_size;
688
            mem->start_addr = old.start_addr;
689
            mem->phys_offset = old.phys_offset;
690
            mem->flags = 0;
691

    
692
            err = kvm_set_user_memory_region(s, mem);
693
            if (err) {
694
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
695
                        strerror(-err));
696
                abort();
697
            }
698

    
699
            start_addr += old.memory_size;
700
            phys_offset += old.memory_size;
701
            size -= old.memory_size;
702
            continue;
703
        }
704

    
705
        /* register prefix slot */
706
        if (old.start_addr < start_addr) {
707
            mem = kvm_alloc_slot(s);
708
            mem->memory_size = start_addr - old.start_addr;
709
            mem->start_addr = old.start_addr;
710
            mem->phys_offset = old.phys_offset;
711
            mem->flags = 0;
712

    
713
            err = kvm_set_user_memory_region(s, mem);
714
            if (err) {
715
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
716
                        __func__, strerror(-err));
717
                abort();
718
            }
719
        }
720

    
721
        /* register suffix slot */
722
        if (old.start_addr + old.memory_size > start_addr + size) {
723
            ram_addr_t size_delta;
724

    
725
            mem = kvm_alloc_slot(s);
726
            mem->start_addr = start_addr + size;
727
            size_delta = mem->start_addr - old.start_addr;
728
            mem->memory_size = old.memory_size - size_delta;
729
            mem->phys_offset = old.phys_offset + size_delta;
730
            mem->flags = 0;
731

    
732
            err = kvm_set_user_memory_region(s, mem);
733
            if (err) {
734
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
735
                        __func__, strerror(-err));
736
                abort();
737
            }
738
        }
739
    }
740

    
741
    /* in case the KVM bug workaround already "consumed" the new slot */
742
    if (!size)
743
        return;
744

    
745
    /* KVM does not need to know about this memory */
746
    if (flags >= IO_MEM_UNASSIGNED)
747
        return;
748

    
749
    mem = kvm_alloc_slot(s);
750
    mem->memory_size = size;
751
    mem->start_addr = start_addr;
752
    mem->phys_offset = phys_offset;
753
    mem->flags = 0;
754

    
755
    err = kvm_set_user_memory_region(s, mem);
756
    if (err) {
757
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
758
                strerror(-err));
759
        abort();
760
    }
761
}
762

    
763
int kvm_ioctl(KVMState *s, int type, ...)
764
{
765
    int ret;
766
    void *arg;
767
    va_list ap;
768

    
769
    va_start(ap, type);
770
    arg = va_arg(ap, void *);
771
    va_end(ap);
772

    
773
    ret = ioctl(s->fd, type, arg);
774
    if (ret == -1)
775
        ret = -errno;
776

    
777
    return ret;
778
}
779

    
780
int kvm_vm_ioctl(KVMState *s, int type, ...)
781
{
782
    int ret;
783
    void *arg;
784
    va_list ap;
785

    
786
    va_start(ap, type);
787
    arg = va_arg(ap, void *);
788
    va_end(ap);
789

    
790
    ret = ioctl(s->vmfd, type, arg);
791
    if (ret == -1)
792
        ret = -errno;
793

    
794
    return ret;
795
}
796

    
797
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
798
{
799
    int ret;
800
    void *arg;
801
    va_list ap;
802

    
803
    va_start(ap, type);
804
    arg = va_arg(ap, void *);
805
    va_end(ap);
806

    
807
    ret = ioctl(env->kvm_fd, type, arg);
808
    if (ret == -1)
809
        ret = -errno;
810

    
811
    return ret;
812
}
813

    
814
int kvm_has_sync_mmu(void)
815
{
816
#ifdef KVM_CAP_SYNC_MMU
817
    KVMState *s = kvm_state;
818

    
819
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
820
#else
821
    return 0;
822
#endif
823
}
824

    
825
void kvm_setup_guest_memory(void *start, size_t size)
826
{
827
    if (!kvm_has_sync_mmu()) {
828
#ifdef MADV_DONTFORK
829
        int ret = madvise(start, size, MADV_DONTFORK);
830

    
831
        if (ret) {
832
            perror("madvice");
833
            exit(1);
834
        }
835
#else
836
        fprintf(stderr,
837
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
838
        exit(1);
839
#endif
840
    }
841
}
842

    
843
#ifdef KVM_CAP_SET_GUEST_DEBUG
844
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
845
                                                 target_ulong pc)
846
{
847
    struct kvm_sw_breakpoint *bp;
848

    
849
    TAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
850
        if (bp->pc == pc)
851
            return bp;
852
    }
853
    return NULL;
854
}
855

    
856
int kvm_sw_breakpoints_active(CPUState *env)
857
{
858
    return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
859
}
860

    
861
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
862
{
863
    struct kvm_guest_debug dbg;
864

    
865
    dbg.control = 0;
866
    if (env->singlestep_enabled)
867
        dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
868

    
869
    kvm_arch_update_guest_debug(env, &dbg);
870
    dbg.control |= reinject_trap;
871

    
872
    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
873
}
874

    
875
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
876
                          target_ulong len, int type)
877
{
878
    struct kvm_sw_breakpoint *bp;
879
    CPUState *env;
880
    int err;
881

    
882
    if (type == GDB_BREAKPOINT_SW) {
883
        bp = kvm_find_sw_breakpoint(current_env, addr);
884
        if (bp) {
885
            bp->use_count++;
886
            return 0;
887
        }
888

    
889
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
890
        if (!bp)
891
            return -ENOMEM;
892

    
893
        bp->pc = addr;
894
        bp->use_count = 1;
895
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
896
        if (err) {
897
            free(bp);
898
            return err;
899
        }
900

    
901
        TAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
902
                          bp, entry);
903
    } else {
904
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
905
        if (err)
906
            return err;
907
    }
908

    
909
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
910
        err = kvm_update_guest_debug(env, 0);
911
        if (err)
912
            return err;
913
    }
914
    return 0;
915
}
916

    
917
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
918
                          target_ulong len, int type)
919
{
920
    struct kvm_sw_breakpoint *bp;
921
    CPUState *env;
922
    int err;
923

    
924
    if (type == GDB_BREAKPOINT_SW) {
925
        bp = kvm_find_sw_breakpoint(current_env, addr);
926
        if (!bp)
927
            return -ENOENT;
928

    
929
        if (bp->use_count > 1) {
930
            bp->use_count--;
931
            return 0;
932
        }
933

    
934
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
935
        if (err)
936
            return err;
937

    
938
        TAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
939
        qemu_free(bp);
940
    } else {
941
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
942
        if (err)
943
            return err;
944
    }
945

    
946
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
947
        err = kvm_update_guest_debug(env, 0);
948
        if (err)
949
            return err;
950
    }
951
    return 0;
952
}
953

    
954
void kvm_remove_all_breakpoints(CPUState *current_env)
955
{
956
    struct kvm_sw_breakpoint *bp, *next;
957
    KVMState *s = current_env->kvm_state;
958
    CPUState *env;
959

    
960
    TAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
961
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
962
            /* Try harder to find a CPU that currently sees the breakpoint. */
963
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
964
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
965
                    break;
966
            }
967
        }
968
    }
969
    kvm_arch_remove_all_hw_breakpoints();
970

    
971
    for (env = first_cpu; env != NULL; env = env->next_cpu)
972
        kvm_update_guest_debug(env, 0);
973
}
974

    
975
#else /* !KVM_CAP_SET_GUEST_DEBUG */
976

    
977
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
978
{
979
    return -EINVAL;
980
}
981

    
982
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
983
                          target_ulong len, int type)
984
{
985
    return -EINVAL;
986
}
987

    
988
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
989
                          target_ulong len, int type)
990
{
991
    return -EINVAL;
992
}
993

    
994
void kvm_remove_all_breakpoints(CPUState *current_env)
995
{
996
}
997
#endif /* !KVM_CAP_SET_GUEST_DEBUG */