Statistics
| Branch: | Revision:

root / kvm-all.c @ 151f7749

History | View | Annotate | Download (25.5 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "gdbstub.h"
26
#include "kvm.h"
27

    
28
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
29
#define PAGE_SIZE TARGET_PAGE_SIZE
30

    
31
//#define DEBUG_KVM
32

    
33
#ifdef DEBUG_KVM
34
#define dprintf(fmt, ...) \
35
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
36
#else
37
#define dprintf(fmt, ...) \
38
    do { } while (0)
39
#endif
40

    
41
typedef struct KVMSlot
42
{
43
    target_phys_addr_t start_addr;
44
    ram_addr_t memory_size;
45
    ram_addr_t phys_offset;
46
    int slot;
47
    int flags;
48
} KVMSlot;
49

    
50
typedef struct kvm_dirty_log KVMDirtyLog;
51

    
52
int kvm_allowed = 0;
53

    
54
struct KVMState
55
{
56
    KVMSlot slots[32];
57
    int fd;
58
    int vmfd;
59
    int coalesced_mmio;
60
    int broken_set_mem_region;
61
    int migration_log;
62
#ifdef KVM_CAP_SET_GUEST_DEBUG
63
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
64
#endif
65
};
66

    
67
static KVMState *kvm_state;
68

    
69
static KVMSlot *kvm_alloc_slot(KVMState *s)
70
{
71
    int i;
72

    
73
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
74
        /* KVM private memory slots */
75
        if (i >= 8 && i < 12)
76
            continue;
77
        if (s->slots[i].memory_size == 0)
78
            return &s->slots[i];
79
    }
80

    
81
    fprintf(stderr, "%s: no free slot available\n", __func__);
82
    abort();
83
}
84

    
85
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
86
                                         target_phys_addr_t start_addr,
87
                                         target_phys_addr_t end_addr)
88
{
89
    int i;
90

    
91
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
92
        KVMSlot *mem = &s->slots[i];
93

    
94
        if (start_addr == mem->start_addr &&
95
            end_addr == mem->start_addr + mem->memory_size) {
96
            return mem;
97
        }
98
    }
99

    
100
    return NULL;
101
}
102

    
103
/*
104
 * Find overlapping slot with lowest start address
105
 */
106
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
107
                                            target_phys_addr_t start_addr,
108
                                            target_phys_addr_t end_addr)
109
{
110
    KVMSlot *found = NULL;
111
    int i;
112

    
113
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
114
        KVMSlot *mem = &s->slots[i];
115

    
116
        if (mem->memory_size == 0 ||
117
            (found && found->start_addr < mem->start_addr)) {
118
            continue;
119
        }
120

    
121
        if (end_addr > mem->start_addr &&
122
            start_addr < mem->start_addr + mem->memory_size) {
123
            found = mem;
124
        }
125
    }
126

    
127
    return found;
128
}
129

    
130
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
131
{
132
    struct kvm_userspace_memory_region mem;
133

    
134
    mem.slot = slot->slot;
135
    mem.guest_phys_addr = slot->start_addr;
136
    mem.memory_size = slot->memory_size;
137
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
138
    mem.flags = slot->flags;
139
    if (s->migration_log) {
140
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
141
    }
142
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
143
}
144

    
145

    
146
int kvm_init_vcpu(CPUState *env)
147
{
148
    KVMState *s = kvm_state;
149
    long mmap_size;
150
    int ret;
151

    
152
    dprintf("kvm_init_vcpu\n");
153

    
154
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
155
    if (ret < 0) {
156
        dprintf("kvm_create_vcpu failed\n");
157
        goto err;
158
    }
159

    
160
    env->kvm_fd = ret;
161
    env->kvm_state = s;
162

    
163
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
164
    if (mmap_size < 0) {
165
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
166
        goto err;
167
    }
168

    
169
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
170
                        env->kvm_fd, 0);
171
    if (env->kvm_run == MAP_FAILED) {
172
        ret = -errno;
173
        dprintf("mmap'ing vcpu state failed\n");
174
        goto err;
175
    }
176

    
177
    ret = kvm_arch_init_vcpu(env);
178

    
179
err:
180
    return ret;
181
}
182

    
183
int kvm_sync_vcpus(void)
184
{
185
    CPUState *env;
186

    
187
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
188
        int ret;
189

    
190
        ret = kvm_arch_put_registers(env);
191
        if (ret)
192
            return ret;
193
    }
194

    
195
    return 0;
196
}
197

    
198
/*
199
 * dirty pages logging control
200
 */
201
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
202
                                      ram_addr_t size, int flags, int mask)
203
{
204
    KVMState *s = kvm_state;
205
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
206
    int old_flags;
207

    
208
    if (mem == NULL)  {
209
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
210
                    TARGET_FMT_plx "\n", __func__, phys_addr,
211
                    phys_addr + size - 1);
212
            return -EINVAL;
213
    }
214

    
215
    old_flags = mem->flags;
216

    
217
    flags = (mem->flags & ~mask) | flags;
218
    mem->flags = flags;
219

    
220
    /* If nothing changed effectively, no need to issue ioctl */
221
    if (s->migration_log) {
222
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
223
    }
224
    if (flags == old_flags) {
225
            return 0;
226
    }
227

    
228
    return kvm_set_user_memory_region(s, mem);
229
}
230

    
231
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
232
{
233
        return kvm_dirty_pages_log_change(phys_addr, size,
234
                                          KVM_MEM_LOG_DIRTY_PAGES,
235
                                          KVM_MEM_LOG_DIRTY_PAGES);
236
}
237

    
238
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
239
{
240
        return kvm_dirty_pages_log_change(phys_addr, size,
241
                                          0,
242
                                          KVM_MEM_LOG_DIRTY_PAGES);
243
}
244

    
245
int kvm_set_migration_log(int enable)
246
{
247
    KVMState *s = kvm_state;
248
    KVMSlot *mem;
249
    int i, err;
250

    
251
    s->migration_log = enable;
252

    
253
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
254
        mem = &s->slots[i];
255

    
256
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
257
            continue;
258
        }
259
        err = kvm_set_user_memory_region(s, mem);
260
        if (err) {
261
            return err;
262
        }
263
    }
264
    return 0;
265
}
266

    
267
/**
268
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
269
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
270
 * This means all bits are set to dirty.
271
 *
272
 * @start_add: start of logged region.
273
 * @end_addr: end of logged region.
274
 */
275
int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
276
                                   target_phys_addr_t end_addr)
277
{
278
    KVMState *s = kvm_state;
279
    unsigned long size, allocated_size = 0;
280
    target_phys_addr_t phys_addr;
281
    ram_addr_t addr;
282
    KVMDirtyLog d;
283
    KVMSlot *mem;
284
    int ret = 0;
285

    
286
    d.dirty_bitmap = NULL;
287
    while (start_addr < end_addr) {
288
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
289
        if (mem == NULL) {
290
            break;
291
        }
292

    
293
        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
294
        if (!d.dirty_bitmap) {
295
            d.dirty_bitmap = qemu_malloc(size);
296
        } else if (size > allocated_size) {
297
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
298
        }
299
        allocated_size = size;
300
        memset(d.dirty_bitmap, 0, allocated_size);
301

    
302
        d.slot = mem->slot;
303

    
304
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
305
            dprintf("ioctl failed %d\n", errno);
306
            ret = -1;
307
            break;
308
        }
309

    
310
        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
311
             phys_addr < mem->start_addr + mem->memory_size;
312
             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
313
            unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
314
            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
315
            unsigned word = nr / (sizeof(*bitmap) * 8);
316
            unsigned bit = nr % (sizeof(*bitmap) * 8);
317

    
318
            if ((bitmap[word] >> bit) & 1) {
319
                cpu_physical_memory_set_dirty(addr);
320
            }
321
        }
322
        start_addr = phys_addr;
323
    }
324
    qemu_free(d.dirty_bitmap);
325

    
326
    return ret;
327
}
328

    
329
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
330
{
331
    int ret = -ENOSYS;
332
#ifdef KVM_CAP_COALESCED_MMIO
333
    KVMState *s = kvm_state;
334

    
335
    if (s->coalesced_mmio) {
336
        struct kvm_coalesced_mmio_zone zone;
337

    
338
        zone.addr = start;
339
        zone.size = size;
340

    
341
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
342
    }
343
#endif
344

    
345
    return ret;
346
}
347

    
348
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
349
{
350
    int ret = -ENOSYS;
351
#ifdef KVM_CAP_COALESCED_MMIO
352
    KVMState *s = kvm_state;
353

    
354
    if (s->coalesced_mmio) {
355
        struct kvm_coalesced_mmio_zone zone;
356

    
357
        zone.addr = start;
358
        zone.size = size;
359

    
360
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
361
    }
362
#endif
363

    
364
    return ret;
365
}
366

    
367
int kvm_check_extension(KVMState *s, unsigned int extension)
368
{
369
    int ret;
370

    
371
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
372
    if (ret < 0) {
373
        ret = 0;
374
    }
375

    
376
    return ret;
377
}
378

    
379
int kvm_init(int smp_cpus)
380
{
381
    KVMState *s;
382
    int ret;
383
    int i;
384

    
385
    if (smp_cpus > 1) {
386
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
387
        return -EINVAL;
388
    }
389

    
390
    s = qemu_mallocz(sizeof(KVMState));
391

    
392
#ifdef KVM_CAP_SET_GUEST_DEBUG
393
    TAILQ_INIT(&s->kvm_sw_breakpoints);
394
#endif
395
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
396
        s->slots[i].slot = i;
397

    
398
    s->vmfd = -1;
399
    s->fd = open("/dev/kvm", O_RDWR);
400
    if (s->fd == -1) {
401
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
402
        ret = -errno;
403
        goto err;
404
    }
405

    
406
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
407
    if (ret < KVM_API_VERSION) {
408
        if (ret > 0)
409
            ret = -EINVAL;
410
        fprintf(stderr, "kvm version too old\n");
411
        goto err;
412
    }
413

    
414
    if (ret > KVM_API_VERSION) {
415
        ret = -EINVAL;
416
        fprintf(stderr, "kvm version not supported\n");
417
        goto err;
418
    }
419

    
420
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
421
    if (s->vmfd < 0)
422
        goto err;
423

    
424
    /* initially, KVM allocated its own memory and we had to jump through
425
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
426
     * just use a user allocated buffer so we can use regular pages
427
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
428
     */
429
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
430
        ret = -EINVAL;
431
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
432
        goto err;
433
    }
434

    
435
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
436
     * destroyed properly.  Since we rely on this capability, refuse to work
437
     * with any kernel without this capability. */
438
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
439
        ret = -EINVAL;
440

    
441
        fprintf(stderr,
442
                "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
443
                "Please upgrade to at least kvm-81.\n");
444
        goto err;
445
    }
446

    
447
#ifdef KVM_CAP_COALESCED_MMIO
448
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
449
#else
450
    s->coalesced_mmio = 0;
451
#endif
452

    
453
    s->broken_set_mem_region = 1;
454
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
455
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
456
    if (ret > 0) {
457
        s->broken_set_mem_region = 0;
458
    }
459
#endif
460

    
461
    ret = kvm_arch_init(s, smp_cpus);
462
    if (ret < 0)
463
        goto err;
464

    
465
    kvm_state = s;
466

    
467
    return 0;
468

    
469
err:
470
    if (s) {
471
        if (s->vmfd != -1)
472
            close(s->vmfd);
473
        if (s->fd != -1)
474
            close(s->fd);
475
    }
476
    qemu_free(s);
477

    
478
    return ret;
479
}
480

    
481
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
482
                         int direction, int size, uint32_t count)
483
{
484
    int i;
485
    uint8_t *ptr = data;
486

    
487
    for (i = 0; i < count; i++) {
488
        if (direction == KVM_EXIT_IO_IN) {
489
            switch (size) {
490
            case 1:
491
                stb_p(ptr, cpu_inb(env, port));
492
                break;
493
            case 2:
494
                stw_p(ptr, cpu_inw(env, port));
495
                break;
496
            case 4:
497
                stl_p(ptr, cpu_inl(env, port));
498
                break;
499
            }
500
        } else {
501
            switch (size) {
502
            case 1:
503
                cpu_outb(env, port, ldub_p(ptr));
504
                break;
505
            case 2:
506
                cpu_outw(env, port, lduw_p(ptr));
507
                break;
508
            case 4:
509
                cpu_outl(env, port, ldl_p(ptr));
510
                break;
511
            }
512
        }
513

    
514
        ptr += size;
515
    }
516

    
517
    return 1;
518
}
519

    
520
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
521
{
522
#ifdef KVM_CAP_COALESCED_MMIO
523
    KVMState *s = kvm_state;
524
    if (s->coalesced_mmio) {
525
        struct kvm_coalesced_mmio_ring *ring;
526

    
527
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
528
        while (ring->first != ring->last) {
529
            struct kvm_coalesced_mmio *ent;
530

    
531
            ent = &ring->coalesced_mmio[ring->first];
532

    
533
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
534
            /* FIXME smp_wmb() */
535
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
536
        }
537
    }
538
#endif
539
}
540

    
541
int kvm_cpu_exec(CPUState *env)
542
{
543
    struct kvm_run *run = env->kvm_run;
544
    int ret;
545

    
546
    dprintf("kvm_cpu_exec()\n");
547

    
548
    do {
549
        kvm_arch_pre_run(env, run);
550

    
551
        if (env->exit_request) {
552
            dprintf("interrupt exit requested\n");
553
            ret = 0;
554
            break;
555
        }
556

    
557
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
558
        kvm_arch_post_run(env, run);
559

    
560
        if (ret == -EINTR || ret == -EAGAIN) {
561
            dprintf("io window exit\n");
562
            ret = 0;
563
            break;
564
        }
565

    
566
        if (ret < 0) {
567
            dprintf("kvm run failed %s\n", strerror(-ret));
568
            abort();
569
        }
570

    
571
        kvm_run_coalesced_mmio(env, run);
572

    
573
        ret = 0; /* exit loop */
574
        switch (run->exit_reason) {
575
        case KVM_EXIT_IO:
576
            dprintf("handle_io\n");
577
            ret = kvm_handle_io(env, run->io.port,
578
                                (uint8_t *)run + run->io.data_offset,
579
                                run->io.direction,
580
                                run->io.size,
581
                                run->io.count);
582
            break;
583
        case KVM_EXIT_MMIO:
584
            dprintf("handle_mmio\n");
585
            cpu_physical_memory_rw(run->mmio.phys_addr,
586
                                   run->mmio.data,
587
                                   run->mmio.len,
588
                                   run->mmio.is_write);
589
            ret = 1;
590
            break;
591
        case KVM_EXIT_IRQ_WINDOW_OPEN:
592
            dprintf("irq_window_open\n");
593
            break;
594
        case KVM_EXIT_SHUTDOWN:
595
            dprintf("shutdown\n");
596
            qemu_system_reset_request();
597
            ret = 1;
598
            break;
599
        case KVM_EXIT_UNKNOWN:
600
            dprintf("kvm_exit_unknown\n");
601
            break;
602
        case KVM_EXIT_FAIL_ENTRY:
603
            dprintf("kvm_exit_fail_entry\n");
604
            break;
605
        case KVM_EXIT_EXCEPTION:
606
            dprintf("kvm_exit_exception\n");
607
            break;
608
        case KVM_EXIT_DEBUG:
609
            dprintf("kvm_exit_debug\n");
610
#ifdef KVM_CAP_SET_GUEST_DEBUG
611
            if (kvm_arch_debug(&run->debug.arch)) {
612
                gdb_set_stop_cpu(env);
613
                vm_stop(EXCP_DEBUG);
614
                env->exception_index = EXCP_DEBUG;
615
                return 0;
616
            }
617
            /* re-enter, this exception was guest-internal */
618
            ret = 1;
619
#endif /* KVM_CAP_SET_GUEST_DEBUG */
620
            break;
621
        default:
622
            dprintf("kvm_arch_handle_exit\n");
623
            ret = kvm_arch_handle_exit(env, run);
624
            break;
625
        }
626
    } while (ret > 0);
627

    
628
    if (env->exit_request) {
629
        env->exit_request = 0;
630
        env->exception_index = EXCP_INTERRUPT;
631
    }
632

    
633
    return ret;
634
}
635

    
636
void kvm_set_phys_mem(target_phys_addr_t start_addr,
637
                      ram_addr_t size,
638
                      ram_addr_t phys_offset)
639
{
640
    KVMState *s = kvm_state;
641
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
642
    KVMSlot *mem, old;
643
    int err;
644

    
645
    if (start_addr & ~TARGET_PAGE_MASK) {
646
        if (flags >= IO_MEM_UNASSIGNED) {
647
            if (!kvm_lookup_overlapping_slot(s, start_addr,
648
                                             start_addr + size)) {
649
                return;
650
            }
651
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
652
        } else {
653
            fprintf(stderr, "Only page-aligned memory slots supported\n");
654
        }
655
        abort();
656
    }
657

    
658
    /* KVM does not support read-only slots */
659
    phys_offset &= ~IO_MEM_ROM;
660

    
661
    while (1) {
662
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
663
        if (!mem) {
664
            break;
665
        }
666

    
667
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
668
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
669
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
670
            /* The new slot fits into the existing one and comes with
671
             * identical parameters - nothing to be done. */
672
            return;
673
        }
674

    
675
        old = *mem;
676

    
677
        /* unregister the overlapping slot */
678
        mem->memory_size = 0;
679
        err = kvm_set_user_memory_region(s, mem);
680
        if (err) {
681
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
682
                    __func__, strerror(-err));
683
            abort();
684
        }
685

    
686
        /* Workaround for older KVM versions: we can't join slots, even not by
687
         * unregistering the previous ones and then registering the larger
688
         * slot. We have to maintain the existing fragmentation. Sigh.
689
         *
690
         * This workaround assumes that the new slot starts at the same
691
         * address as the first existing one. If not or if some overlapping
692
         * slot comes around later, we will fail (not seen in practice so far)
693
         * - and actually require a recent KVM version. */
694
        if (s->broken_set_mem_region &&
695
            old.start_addr == start_addr && old.memory_size < size &&
696
            flags < IO_MEM_UNASSIGNED) {
697
            mem = kvm_alloc_slot(s);
698
            mem->memory_size = old.memory_size;
699
            mem->start_addr = old.start_addr;
700
            mem->phys_offset = old.phys_offset;
701
            mem->flags = 0;
702

    
703
            err = kvm_set_user_memory_region(s, mem);
704
            if (err) {
705
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
706
                        strerror(-err));
707
                abort();
708
            }
709

    
710
            start_addr += old.memory_size;
711
            phys_offset += old.memory_size;
712
            size -= old.memory_size;
713
            continue;
714
        }
715

    
716
        /* register prefix slot */
717
        if (old.start_addr < start_addr) {
718
            mem = kvm_alloc_slot(s);
719
            mem->memory_size = start_addr - old.start_addr;
720
            mem->start_addr = old.start_addr;
721
            mem->phys_offset = old.phys_offset;
722
            mem->flags = 0;
723

    
724
            err = kvm_set_user_memory_region(s, mem);
725
            if (err) {
726
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
727
                        __func__, strerror(-err));
728
                abort();
729
            }
730
        }
731

    
732
        /* register suffix slot */
733
        if (old.start_addr + old.memory_size > start_addr + size) {
734
            ram_addr_t size_delta;
735

    
736
            mem = kvm_alloc_slot(s);
737
            mem->start_addr = start_addr + size;
738
            size_delta = mem->start_addr - old.start_addr;
739
            mem->memory_size = old.memory_size - size_delta;
740
            mem->phys_offset = old.phys_offset + size_delta;
741
            mem->flags = 0;
742

    
743
            err = kvm_set_user_memory_region(s, mem);
744
            if (err) {
745
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
746
                        __func__, strerror(-err));
747
                abort();
748
            }
749
        }
750
    }
751

    
752
    /* in case the KVM bug workaround already "consumed" the new slot */
753
    if (!size)
754
        return;
755

    
756
    /* KVM does not need to know about this memory */
757
    if (flags >= IO_MEM_UNASSIGNED)
758
        return;
759

    
760
    mem = kvm_alloc_slot(s);
761
    mem->memory_size = size;
762
    mem->start_addr = start_addr;
763
    mem->phys_offset = phys_offset;
764
    mem->flags = 0;
765

    
766
    err = kvm_set_user_memory_region(s, mem);
767
    if (err) {
768
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
769
                strerror(-err));
770
        abort();
771
    }
772
}
773

    
774
int kvm_ioctl(KVMState *s, int type, ...)
775
{
776
    int ret;
777
    void *arg;
778
    va_list ap;
779

    
780
    va_start(ap, type);
781
    arg = va_arg(ap, void *);
782
    va_end(ap);
783

    
784
    ret = ioctl(s->fd, type, arg);
785
    if (ret == -1)
786
        ret = -errno;
787

    
788
    return ret;
789
}
790

    
791
int kvm_vm_ioctl(KVMState *s, int type, ...)
792
{
793
    int ret;
794
    void *arg;
795
    va_list ap;
796

    
797
    va_start(ap, type);
798
    arg = va_arg(ap, void *);
799
    va_end(ap);
800

    
801
    ret = ioctl(s->vmfd, type, arg);
802
    if (ret == -1)
803
        ret = -errno;
804

    
805
    return ret;
806
}
807

    
808
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
809
{
810
    int ret;
811
    void *arg;
812
    va_list ap;
813

    
814
    va_start(ap, type);
815
    arg = va_arg(ap, void *);
816
    va_end(ap);
817

    
818
    ret = ioctl(env->kvm_fd, type, arg);
819
    if (ret == -1)
820
        ret = -errno;
821

    
822
    return ret;
823
}
824

    
825
int kvm_has_sync_mmu(void)
826
{
827
#ifdef KVM_CAP_SYNC_MMU
828
    KVMState *s = kvm_state;
829

    
830
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
831
#else
832
    return 0;
833
#endif
834
}
835

    
836
void kvm_setup_guest_memory(void *start, size_t size)
837
{
838
    if (!kvm_has_sync_mmu()) {
839
#ifdef MADV_DONTFORK
840
        int ret = madvise(start, size, MADV_DONTFORK);
841

    
842
        if (ret) {
843
            perror("madvice");
844
            exit(1);
845
        }
846
#else
847
        fprintf(stderr,
848
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
849
        exit(1);
850
#endif
851
    }
852
}
853

    
854
#ifdef KVM_CAP_SET_GUEST_DEBUG
855
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
856
                                                 target_ulong pc)
857
{
858
    struct kvm_sw_breakpoint *bp;
859

    
860
    TAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
861
        if (bp->pc == pc)
862
            return bp;
863
    }
864
    return NULL;
865
}
866

    
867
int kvm_sw_breakpoints_active(CPUState *env)
868
{
869
    return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
870
}
871

    
872
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
873
{
874
    struct kvm_guest_debug dbg;
875

    
876
    dbg.control = 0;
877
    if (env->singlestep_enabled)
878
        dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
879

    
880
    kvm_arch_update_guest_debug(env, &dbg);
881
    dbg.control |= reinject_trap;
882

    
883
    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
884
}
885

    
886
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
887
                          target_ulong len, int type)
888
{
889
    struct kvm_sw_breakpoint *bp;
890
    CPUState *env;
891
    int err;
892

    
893
    if (type == GDB_BREAKPOINT_SW) {
894
        bp = kvm_find_sw_breakpoint(current_env, addr);
895
        if (bp) {
896
            bp->use_count++;
897
            return 0;
898
        }
899

    
900
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
901
        if (!bp)
902
            return -ENOMEM;
903

    
904
        bp->pc = addr;
905
        bp->use_count = 1;
906
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
907
        if (err) {
908
            free(bp);
909
            return err;
910
        }
911

    
912
        TAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
913
                          bp, entry);
914
    } else {
915
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
916
        if (err)
917
            return err;
918
    }
919

    
920
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
921
        err = kvm_update_guest_debug(env, 0);
922
        if (err)
923
            return err;
924
    }
925
    return 0;
926
}
927

    
928
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
929
                          target_ulong len, int type)
930
{
931
    struct kvm_sw_breakpoint *bp;
932
    CPUState *env;
933
    int err;
934

    
935
    if (type == GDB_BREAKPOINT_SW) {
936
        bp = kvm_find_sw_breakpoint(current_env, addr);
937
        if (!bp)
938
            return -ENOENT;
939

    
940
        if (bp->use_count > 1) {
941
            bp->use_count--;
942
            return 0;
943
        }
944

    
945
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
946
        if (err)
947
            return err;
948

    
949
        TAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
950
        qemu_free(bp);
951
    } else {
952
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
953
        if (err)
954
            return err;
955
    }
956

    
957
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
958
        err = kvm_update_guest_debug(env, 0);
959
        if (err)
960
            return err;
961
    }
962
    return 0;
963
}
964

    
965
void kvm_remove_all_breakpoints(CPUState *current_env)
966
{
967
    struct kvm_sw_breakpoint *bp, *next;
968
    KVMState *s = current_env->kvm_state;
969
    CPUState *env;
970

    
971
    TAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
972
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
973
            /* Try harder to find a CPU that currently sees the breakpoint. */
974
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
975
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
976
                    break;
977
            }
978
        }
979
    }
980
    kvm_arch_remove_all_hw_breakpoints();
981

    
982
    for (env = first_cpu; env != NULL; env = env->next_cpu)
983
        kvm_update_guest_debug(env, 0);
984
}
985

    
986
#else /* !KVM_CAP_SET_GUEST_DEBUG */
987

    
988
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
989
{
990
    return -EINVAL;
991
}
992

    
993
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
994
                          target_ulong len, int type)
995
{
996
    return -EINVAL;
997
}
998

    
999
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1000
                          target_ulong len, int type)
1001
{
1002
    return -EINVAL;
1003
}
1004

    
1005
void kvm_remove_all_breakpoints(CPUState *current_env)
1006
{
1007
}
1008
#endif /* !KVM_CAP_SET_GUEST_DEBUG */