Statistics
| Branch: | Revision:

root / kvm-all.c @ bd836776

History | View | Annotate | Download (26.6 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "hw/hw.h"
26
#include "gdbstub.h"
27
#include "kvm.h"
28

    
29
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
30
#define PAGE_SIZE TARGET_PAGE_SIZE
31

    
32
//#define DEBUG_KVM
33

    
34
#ifdef DEBUG_KVM
35
#define dprintf(fmt, ...) \
36
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
37
#else
38
#define dprintf(fmt, ...) \
39
    do { } while (0)
40
#endif
41

    
42
typedef struct KVMSlot
43
{
44
    target_phys_addr_t start_addr;
45
    ram_addr_t memory_size;
46
    ram_addr_t phys_offset;
47
    int slot;
48
    int flags;
49
} KVMSlot;
50

    
51
typedef struct kvm_dirty_log KVMDirtyLog;
52

    
53
int kvm_allowed = 0;
54

    
55
struct KVMState
56
{
57
    KVMSlot slots[32];
58
    int fd;
59
    int vmfd;
60
    int coalesced_mmio;
61
    int broken_set_mem_region;
62
    int migration_log;
63
#ifdef KVM_CAP_SET_GUEST_DEBUG
64
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
65
#endif
66
};
67

    
68
static KVMState *kvm_state;
69

    
70
static KVMSlot *kvm_alloc_slot(KVMState *s)
71
{
72
    int i;
73

    
74
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
75
        /* KVM private memory slots */
76
        if (i >= 8 && i < 12)
77
            continue;
78
        if (s->slots[i].memory_size == 0)
79
            return &s->slots[i];
80
    }
81

    
82
    fprintf(stderr, "%s: no free slot available\n", __func__);
83
    abort();
84
}
85

    
86
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
87
                                         target_phys_addr_t start_addr,
88
                                         target_phys_addr_t end_addr)
89
{
90
    int i;
91

    
92
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
93
        KVMSlot *mem = &s->slots[i];
94

    
95
        if (start_addr == mem->start_addr &&
96
            end_addr == mem->start_addr + mem->memory_size) {
97
            return mem;
98
        }
99
    }
100

    
101
    return NULL;
102
}
103

    
104
/*
105
 * Find overlapping slot with lowest start address
106
 */
107
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
108
                                            target_phys_addr_t start_addr,
109
                                            target_phys_addr_t end_addr)
110
{
111
    KVMSlot *found = NULL;
112
    int i;
113

    
114
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
115
        KVMSlot *mem = &s->slots[i];
116

    
117
        if (mem->memory_size == 0 ||
118
            (found && found->start_addr < mem->start_addr)) {
119
            continue;
120
        }
121

    
122
        if (end_addr > mem->start_addr &&
123
            start_addr < mem->start_addr + mem->memory_size) {
124
            found = mem;
125
        }
126
    }
127

    
128
    return found;
129
}
130

    
131
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
132
{
133
    struct kvm_userspace_memory_region mem;
134

    
135
    mem.slot = slot->slot;
136
    mem.guest_phys_addr = slot->start_addr;
137
    mem.memory_size = slot->memory_size;
138
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
139
    mem.flags = slot->flags;
140
    if (s->migration_log) {
141
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
142
    }
143
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
144
}
145

    
146
static void kvm_reset_vcpu(void *opaque)
147
{
148
    CPUState *env = opaque;
149

    
150
    if (kvm_arch_put_registers(env)) {
151
        fprintf(stderr, "Fatal: kvm vcpu reset failed\n");
152
        abort();
153
    }
154
}
155

    
156
int kvm_init_vcpu(CPUState *env)
157
{
158
    KVMState *s = kvm_state;
159
    long mmap_size;
160
    int ret;
161

    
162
    dprintf("kvm_init_vcpu\n");
163

    
164
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
165
    if (ret < 0) {
166
        dprintf("kvm_create_vcpu failed\n");
167
        goto err;
168
    }
169

    
170
    env->kvm_fd = ret;
171
    env->kvm_state = s;
172

    
173
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
174
    if (mmap_size < 0) {
175
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
176
        goto err;
177
    }
178

    
179
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
180
                        env->kvm_fd, 0);
181
    if (env->kvm_run == MAP_FAILED) {
182
        ret = -errno;
183
        dprintf("mmap'ing vcpu state failed\n");
184
        goto err;
185
    }
186

    
187
    ret = kvm_arch_init_vcpu(env);
188
    if (ret == 0) {
189
        qemu_register_reset(kvm_reset_vcpu, env);
190
        ret = kvm_arch_put_registers(env);
191
    }
192
err:
193
    return ret;
194
}
195

    
196
int kvm_put_mp_state(CPUState *env)
197
{
198
    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
199

    
200
    return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
201
}
202

    
203
int kvm_get_mp_state(CPUState *env)
204
{
205
    struct kvm_mp_state mp_state;
206
    int ret;
207

    
208
    ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
209
    if (ret < 0) {
210
        return ret;
211
    }
212
    env->mp_state = mp_state.mp_state;
213
    return 0;
214
}
215

    
216
/*
217
 * dirty pages logging control
218
 */
219
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
220
                                      ram_addr_t size, int flags, int mask)
221
{
222
    KVMState *s = kvm_state;
223
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
224
    int old_flags;
225

    
226
    if (mem == NULL)  {
227
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
228
                    TARGET_FMT_plx "\n", __func__, phys_addr,
229
                    (target_phys_addr_t)(phys_addr + size - 1));
230
            return -EINVAL;
231
    }
232

    
233
    old_flags = mem->flags;
234

    
235
    flags = (mem->flags & ~mask) | flags;
236
    mem->flags = flags;
237

    
238
    /* If nothing changed effectively, no need to issue ioctl */
239
    if (s->migration_log) {
240
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
241
    }
242
    if (flags == old_flags) {
243
            return 0;
244
    }
245

    
246
    return kvm_set_user_memory_region(s, mem);
247
}
248

    
249
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
250
{
251
        return kvm_dirty_pages_log_change(phys_addr, size,
252
                                          KVM_MEM_LOG_DIRTY_PAGES,
253
                                          KVM_MEM_LOG_DIRTY_PAGES);
254
}
255

    
256
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
257
{
258
        return kvm_dirty_pages_log_change(phys_addr, size,
259
                                          0,
260
                                          KVM_MEM_LOG_DIRTY_PAGES);
261
}
262

    
263
int kvm_set_migration_log(int enable)
264
{
265
    KVMState *s = kvm_state;
266
    KVMSlot *mem;
267
    int i, err;
268

    
269
    s->migration_log = enable;
270

    
271
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
272
        mem = &s->slots[i];
273

    
274
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
275
            continue;
276
        }
277
        err = kvm_set_user_memory_region(s, mem);
278
        if (err) {
279
            return err;
280
        }
281
    }
282
    return 0;
283
}
284

    
285
/**
286
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
287
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
288
 * This means all bits are set to dirty.
289
 *
290
 * @start_add: start of logged region.
291
 * @end_addr: end of logged region.
292
 */
293
int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
294
                                   target_phys_addr_t end_addr)
295
{
296
    KVMState *s = kvm_state;
297
    unsigned long size, allocated_size = 0;
298
    target_phys_addr_t phys_addr;
299
    ram_addr_t addr;
300
    KVMDirtyLog d;
301
    KVMSlot *mem;
302
    int ret = 0;
303
    int r;
304

    
305
    d.dirty_bitmap = NULL;
306
    while (start_addr < end_addr) {
307
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
308
        if (mem == NULL) {
309
            break;
310
        }
311

    
312
        /* We didn't activate dirty logging? Don't care then. */
313
        if(!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) {
314
            continue;
315
        }
316

    
317
        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
318
        if (!d.dirty_bitmap) {
319
            d.dirty_bitmap = qemu_malloc(size);
320
        } else if (size > allocated_size) {
321
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
322
        }
323
        allocated_size = size;
324
        memset(d.dirty_bitmap, 0, allocated_size);
325

    
326
        d.slot = mem->slot;
327

    
328
        r = kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d);
329
        if (r == -EINVAL) {
330
            dprintf("ioctl failed %d\n", errno);
331
            ret = -1;
332
            break;
333
        }
334

    
335
        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
336
             phys_addr < mem->start_addr + mem->memory_size;
337
             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
338
            unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
339
            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
340
            unsigned word = nr / (sizeof(*bitmap) * 8);
341
            unsigned bit = nr % (sizeof(*bitmap) * 8);
342

    
343
            if ((bitmap[word] >> bit) & 1) {
344
                cpu_physical_memory_set_dirty(addr);
345
            } else if (r < 0) {
346
                /* When our KVM implementation doesn't know about dirty logging
347
                 * we can just assume it's always dirty and be fine. */
348
                cpu_physical_memory_set_dirty(addr);
349
            }
350
        }
351
        start_addr = phys_addr;
352
    }
353
    qemu_free(d.dirty_bitmap);
354

    
355
    return ret;
356
}
357

    
358
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
359
{
360
    int ret = -ENOSYS;
361
#ifdef KVM_CAP_COALESCED_MMIO
362
    KVMState *s = kvm_state;
363

    
364
    if (s->coalesced_mmio) {
365
        struct kvm_coalesced_mmio_zone zone;
366

    
367
        zone.addr = start;
368
        zone.size = size;
369

    
370
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
371
    }
372
#endif
373

    
374
    return ret;
375
}
376

    
377
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
378
{
379
    int ret = -ENOSYS;
380
#ifdef KVM_CAP_COALESCED_MMIO
381
    KVMState *s = kvm_state;
382

    
383
    if (s->coalesced_mmio) {
384
        struct kvm_coalesced_mmio_zone zone;
385

    
386
        zone.addr = start;
387
        zone.size = size;
388

    
389
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
390
    }
391
#endif
392

    
393
    return ret;
394
}
395

    
396
int kvm_check_extension(KVMState *s, unsigned int extension)
397
{
398
    int ret;
399

    
400
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
401
    if (ret < 0) {
402
        ret = 0;
403
    }
404

    
405
    return ret;
406
}
407

    
408
int kvm_init(int smp_cpus)
409
{
410
    static const char upgrade_note[] =
411
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
412
        "(see http://sourceforge.net/projects/kvm).\n";
413
    KVMState *s;
414
    int ret;
415
    int i;
416

    
417
    if (smp_cpus > 1) {
418
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
419
        return -EINVAL;
420
    }
421

    
422
    s = qemu_mallocz(sizeof(KVMState));
423

    
424
#ifdef KVM_CAP_SET_GUEST_DEBUG
425
    TAILQ_INIT(&s->kvm_sw_breakpoints);
426
#endif
427
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
428
        s->slots[i].slot = i;
429

    
430
    s->vmfd = -1;
431
    s->fd = open("/dev/kvm", O_RDWR);
432
    if (s->fd == -1) {
433
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
434
        ret = -errno;
435
        goto err;
436
    }
437

    
438
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
439
    if (ret < KVM_API_VERSION) {
440
        if (ret > 0)
441
            ret = -EINVAL;
442
        fprintf(stderr, "kvm version too old\n");
443
        goto err;
444
    }
445

    
446
    if (ret > KVM_API_VERSION) {
447
        ret = -EINVAL;
448
        fprintf(stderr, "kvm version not supported\n");
449
        goto err;
450
    }
451

    
452
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
453
    if (s->vmfd < 0)
454
        goto err;
455

    
456
    /* initially, KVM allocated its own memory and we had to jump through
457
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
458
     * just use a user allocated buffer so we can use regular pages
459
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
460
     */
461
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
462
        ret = -EINVAL;
463
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
464
                upgrade_note);
465
        goto err;
466
    }
467

    
468
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
469
     * destroyed properly.  Since we rely on this capability, refuse to work
470
     * with any kernel without this capability. */
471
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
472
        ret = -EINVAL;
473

    
474
        fprintf(stderr,
475
                "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
476
                upgrade_note);
477
        goto err;
478
    }
479

    
480
#ifdef KVM_CAP_COALESCED_MMIO
481
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
482
#else
483
    s->coalesced_mmio = 0;
484
#endif
485

    
486
    s->broken_set_mem_region = 1;
487
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
488
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
489
    if (ret > 0) {
490
        s->broken_set_mem_region = 0;
491
    }
492
#endif
493

    
494
    ret = kvm_arch_init(s, smp_cpus);
495
    if (ret < 0)
496
        goto err;
497

    
498
    kvm_state = s;
499

    
500
    return 0;
501

    
502
err:
503
    if (s) {
504
        if (s->vmfd != -1)
505
            close(s->vmfd);
506
        if (s->fd != -1)
507
            close(s->fd);
508
    }
509
    qemu_free(s);
510

    
511
    return ret;
512
}
513

    
514
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
515
                         int direction, int size, uint32_t count)
516
{
517
    int i;
518
    uint8_t *ptr = data;
519

    
520
    for (i = 0; i < count; i++) {
521
        if (direction == KVM_EXIT_IO_IN) {
522
            switch (size) {
523
            case 1:
524
                stb_p(ptr, cpu_inb(env, port));
525
                break;
526
            case 2:
527
                stw_p(ptr, cpu_inw(env, port));
528
                break;
529
            case 4:
530
                stl_p(ptr, cpu_inl(env, port));
531
                break;
532
            }
533
        } else {
534
            switch (size) {
535
            case 1:
536
                cpu_outb(env, port, ldub_p(ptr));
537
                break;
538
            case 2:
539
                cpu_outw(env, port, lduw_p(ptr));
540
                break;
541
            case 4:
542
                cpu_outl(env, port, ldl_p(ptr));
543
                break;
544
            }
545
        }
546

    
547
        ptr += size;
548
    }
549

    
550
    return 1;
551
}
552

    
553
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
554
{
555
#ifdef KVM_CAP_COALESCED_MMIO
556
    KVMState *s = kvm_state;
557
    if (s->coalesced_mmio) {
558
        struct kvm_coalesced_mmio_ring *ring;
559

    
560
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
561
        while (ring->first != ring->last) {
562
            struct kvm_coalesced_mmio *ent;
563

    
564
            ent = &ring->coalesced_mmio[ring->first];
565

    
566
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
567
            /* FIXME smp_wmb() */
568
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
569
        }
570
    }
571
#endif
572
}
573

    
574
int kvm_cpu_exec(CPUState *env)
575
{
576
    struct kvm_run *run = env->kvm_run;
577
    int ret;
578

    
579
    dprintf("kvm_cpu_exec()\n");
580

    
581
    do {
582
        if (env->exit_request) {
583
            dprintf("interrupt exit requested\n");
584
            ret = 0;
585
            break;
586
        }
587

    
588
        kvm_arch_pre_run(env, run);
589
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
590
        kvm_arch_post_run(env, run);
591

    
592
        if (ret == -EINTR || ret == -EAGAIN) {
593
            dprintf("io window exit\n");
594
            ret = 0;
595
            break;
596
        }
597

    
598
        if (ret < 0) {
599
            dprintf("kvm run failed %s\n", strerror(-ret));
600
            abort();
601
        }
602

    
603
        kvm_run_coalesced_mmio(env, run);
604

    
605
        ret = 0; /* exit loop */
606
        switch (run->exit_reason) {
607
        case KVM_EXIT_IO:
608
            dprintf("handle_io\n");
609
            ret = kvm_handle_io(env, run->io.port,
610
                                (uint8_t *)run + run->io.data_offset,
611
                                run->io.direction,
612
                                run->io.size,
613
                                run->io.count);
614
            break;
615
        case KVM_EXIT_MMIO:
616
            dprintf("handle_mmio\n");
617
            cpu_physical_memory_rw(run->mmio.phys_addr,
618
                                   run->mmio.data,
619
                                   run->mmio.len,
620
                                   run->mmio.is_write);
621
            ret = 1;
622
            break;
623
        case KVM_EXIT_IRQ_WINDOW_OPEN:
624
            dprintf("irq_window_open\n");
625
            break;
626
        case KVM_EXIT_SHUTDOWN:
627
            dprintf("shutdown\n");
628
            qemu_system_reset_request();
629
            ret = 1;
630
            break;
631
        case KVM_EXIT_UNKNOWN:
632
            dprintf("kvm_exit_unknown\n");
633
            break;
634
        case KVM_EXIT_FAIL_ENTRY:
635
            dprintf("kvm_exit_fail_entry\n");
636
            break;
637
        case KVM_EXIT_EXCEPTION:
638
            dprintf("kvm_exit_exception\n");
639
            break;
640
        case KVM_EXIT_DEBUG:
641
            dprintf("kvm_exit_debug\n");
642
#ifdef KVM_CAP_SET_GUEST_DEBUG
643
            if (kvm_arch_debug(&run->debug.arch)) {
644
                gdb_set_stop_cpu(env);
645
                vm_stop(EXCP_DEBUG);
646
                env->exception_index = EXCP_DEBUG;
647
                return 0;
648
            }
649
            /* re-enter, this exception was guest-internal */
650
            ret = 1;
651
#endif /* KVM_CAP_SET_GUEST_DEBUG */
652
            break;
653
        default:
654
            dprintf("kvm_arch_handle_exit\n");
655
            ret = kvm_arch_handle_exit(env, run);
656
            break;
657
        }
658
    } while (ret > 0);
659

    
660
    if (env->exit_request) {
661
        env->exit_request = 0;
662
        env->exception_index = EXCP_INTERRUPT;
663
    }
664

    
665
    return ret;
666
}
667

    
668
void kvm_set_phys_mem(target_phys_addr_t start_addr,
669
                      ram_addr_t size,
670
                      ram_addr_t phys_offset)
671
{
672
    KVMState *s = kvm_state;
673
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
674
    KVMSlot *mem, old;
675
    int err;
676

    
677
    if (start_addr & ~TARGET_PAGE_MASK) {
678
        if (flags >= IO_MEM_UNASSIGNED) {
679
            if (!kvm_lookup_overlapping_slot(s, start_addr,
680
                                             start_addr + size)) {
681
                return;
682
            }
683
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
684
        } else {
685
            fprintf(stderr, "Only page-aligned memory slots supported\n");
686
        }
687
        abort();
688
    }
689

    
690
    /* KVM does not support read-only slots */
691
    phys_offset &= ~IO_MEM_ROM;
692

    
693
    while (1) {
694
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
695
        if (!mem) {
696
            break;
697
        }
698

    
699
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
700
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
701
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
702
            /* The new slot fits into the existing one and comes with
703
             * identical parameters - nothing to be done. */
704
            return;
705
        }
706

    
707
        old = *mem;
708

    
709
        /* unregister the overlapping slot */
710
        mem->memory_size = 0;
711
        err = kvm_set_user_memory_region(s, mem);
712
        if (err) {
713
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
714
                    __func__, strerror(-err));
715
            abort();
716
        }
717

    
718
        /* Workaround for older KVM versions: we can't join slots, even not by
719
         * unregistering the previous ones and then registering the larger
720
         * slot. We have to maintain the existing fragmentation. Sigh.
721
         *
722
         * This workaround assumes that the new slot starts at the same
723
         * address as the first existing one. If not or if some overlapping
724
         * slot comes around later, we will fail (not seen in practice so far)
725
         * - and actually require a recent KVM version. */
726
        if (s->broken_set_mem_region &&
727
            old.start_addr == start_addr && old.memory_size < size &&
728
            flags < IO_MEM_UNASSIGNED) {
729
            mem = kvm_alloc_slot(s);
730
            mem->memory_size = old.memory_size;
731
            mem->start_addr = old.start_addr;
732
            mem->phys_offset = old.phys_offset;
733
            mem->flags = 0;
734

    
735
            err = kvm_set_user_memory_region(s, mem);
736
            if (err) {
737
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
738
                        strerror(-err));
739
                abort();
740
            }
741

    
742
            start_addr += old.memory_size;
743
            phys_offset += old.memory_size;
744
            size -= old.memory_size;
745
            continue;
746
        }
747

    
748
        /* register prefix slot */
749
        if (old.start_addr < start_addr) {
750
            mem = kvm_alloc_slot(s);
751
            mem->memory_size = start_addr - old.start_addr;
752
            mem->start_addr = old.start_addr;
753
            mem->phys_offset = old.phys_offset;
754
            mem->flags = 0;
755

    
756
            err = kvm_set_user_memory_region(s, mem);
757
            if (err) {
758
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
759
                        __func__, strerror(-err));
760
                abort();
761
            }
762
        }
763

    
764
        /* register suffix slot */
765
        if (old.start_addr + old.memory_size > start_addr + size) {
766
            ram_addr_t size_delta;
767

    
768
            mem = kvm_alloc_slot(s);
769
            mem->start_addr = start_addr + size;
770
            size_delta = mem->start_addr - old.start_addr;
771
            mem->memory_size = old.memory_size - size_delta;
772
            mem->phys_offset = old.phys_offset + size_delta;
773
            mem->flags = 0;
774

    
775
            err = kvm_set_user_memory_region(s, mem);
776
            if (err) {
777
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
778
                        __func__, strerror(-err));
779
                abort();
780
            }
781
        }
782
    }
783

    
784
    /* in case the KVM bug workaround already "consumed" the new slot */
785
    if (!size)
786
        return;
787

    
788
    /* KVM does not need to know about this memory */
789
    if (flags >= IO_MEM_UNASSIGNED)
790
        return;
791

    
792
    mem = kvm_alloc_slot(s);
793
    mem->memory_size = size;
794
    mem->start_addr = start_addr;
795
    mem->phys_offset = phys_offset;
796
    mem->flags = 0;
797

    
798
    err = kvm_set_user_memory_region(s, mem);
799
    if (err) {
800
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
801
                strerror(-err));
802
        abort();
803
    }
804
}
805

    
806
int kvm_ioctl(KVMState *s, int type, ...)
807
{
808
    int ret;
809
    void *arg;
810
    va_list ap;
811

    
812
    va_start(ap, type);
813
    arg = va_arg(ap, void *);
814
    va_end(ap);
815

    
816
    ret = ioctl(s->fd, type, arg);
817
    if (ret == -1)
818
        ret = -errno;
819

    
820
    return ret;
821
}
822

    
823
int kvm_vm_ioctl(KVMState *s, int type, ...)
824
{
825
    int ret;
826
    void *arg;
827
    va_list ap;
828

    
829
    va_start(ap, type);
830
    arg = va_arg(ap, void *);
831
    va_end(ap);
832

    
833
    ret = ioctl(s->vmfd, type, arg);
834
    if (ret == -1)
835
        ret = -errno;
836

    
837
    return ret;
838
}
839

    
840
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
841
{
842
    int ret;
843
    void *arg;
844
    va_list ap;
845

    
846
    va_start(ap, type);
847
    arg = va_arg(ap, void *);
848
    va_end(ap);
849

    
850
    ret = ioctl(env->kvm_fd, type, arg);
851
    if (ret == -1)
852
        ret = -errno;
853

    
854
    return ret;
855
}
856

    
857
int kvm_has_sync_mmu(void)
858
{
859
#ifdef KVM_CAP_SYNC_MMU
860
    KVMState *s = kvm_state;
861

    
862
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
863
#else
864
    return 0;
865
#endif
866
}
867

    
868
void kvm_setup_guest_memory(void *start, size_t size)
869
{
870
    if (!kvm_has_sync_mmu()) {
871
#ifdef MADV_DONTFORK
872
        int ret = madvise(start, size, MADV_DONTFORK);
873

    
874
        if (ret) {
875
            perror("madvice");
876
            exit(1);
877
        }
878
#else
879
        fprintf(stderr,
880
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
881
        exit(1);
882
#endif
883
    }
884
}
885

    
886
#ifdef KVM_CAP_SET_GUEST_DEBUG
887
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
888
                                                 target_ulong pc)
889
{
890
    struct kvm_sw_breakpoint *bp;
891

    
892
    TAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
893
        if (bp->pc == pc)
894
            return bp;
895
    }
896
    return NULL;
897
}
898

    
899
int kvm_sw_breakpoints_active(CPUState *env)
900
{
901
    return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
902
}
903

    
904
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
905
{
906
    struct kvm_guest_debug dbg;
907

    
908
    dbg.control = 0;
909
    if (env->singlestep_enabled)
910
        dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
911

    
912
    kvm_arch_update_guest_debug(env, &dbg);
913
    dbg.control |= reinject_trap;
914

    
915
    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
916
}
917

    
918
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
919
                          target_ulong len, int type)
920
{
921
    struct kvm_sw_breakpoint *bp;
922
    CPUState *env;
923
    int err;
924

    
925
    if (type == GDB_BREAKPOINT_SW) {
926
        bp = kvm_find_sw_breakpoint(current_env, addr);
927
        if (bp) {
928
            bp->use_count++;
929
            return 0;
930
        }
931

    
932
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
933
        if (!bp)
934
            return -ENOMEM;
935

    
936
        bp->pc = addr;
937
        bp->use_count = 1;
938
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
939
        if (err) {
940
            free(bp);
941
            return err;
942
        }
943

    
944
        TAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
945
                          bp, entry);
946
    } else {
947
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
948
        if (err)
949
            return err;
950
    }
951

    
952
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
953
        err = kvm_update_guest_debug(env, 0);
954
        if (err)
955
            return err;
956
    }
957
    return 0;
958
}
959

    
960
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
961
                          target_ulong len, int type)
962
{
963
    struct kvm_sw_breakpoint *bp;
964
    CPUState *env;
965
    int err;
966

    
967
    if (type == GDB_BREAKPOINT_SW) {
968
        bp = kvm_find_sw_breakpoint(current_env, addr);
969
        if (!bp)
970
            return -ENOENT;
971

    
972
        if (bp->use_count > 1) {
973
            bp->use_count--;
974
            return 0;
975
        }
976

    
977
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
978
        if (err)
979
            return err;
980

    
981
        TAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
982
        qemu_free(bp);
983
    } else {
984
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
985
        if (err)
986
            return err;
987
    }
988

    
989
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
990
        err = kvm_update_guest_debug(env, 0);
991
        if (err)
992
            return err;
993
    }
994
    return 0;
995
}
996

    
997
void kvm_remove_all_breakpoints(CPUState *current_env)
998
{
999
    struct kvm_sw_breakpoint *bp, *next;
1000
    KVMState *s = current_env->kvm_state;
1001
    CPUState *env;
1002

    
1003
    TAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1004
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1005
            /* Try harder to find a CPU that currently sees the breakpoint. */
1006
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
1007
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1008
                    break;
1009
            }
1010
        }
1011
    }
1012
    kvm_arch_remove_all_hw_breakpoints();
1013

    
1014
    for (env = first_cpu; env != NULL; env = env->next_cpu)
1015
        kvm_update_guest_debug(env, 0);
1016
}
1017

    
1018
#else /* !KVM_CAP_SET_GUEST_DEBUG */
1019

    
1020
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1021
{
1022
    return -EINVAL;
1023
}
1024

    
1025
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1026
                          target_ulong len, int type)
1027
{
1028
    return -EINVAL;
1029
}
1030

    
1031
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1032
                          target_ulong len, int type)
1033
{
1034
    return -EINVAL;
1035
}
1036

    
1037
void kvm_remove_all_breakpoints(CPUState *current_env)
1038
{
1039
}
1040
#endif /* !KVM_CAP_SET_GUEST_DEBUG */