Statistics
| Branch: | Revision:

root / kvm-all.c @ 8c14c173

History | View | Annotate | Download (26.2 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "hw/hw.h"
26
#include "gdbstub.h"
27
#include "kvm.h"
28

    
29
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
30
#define PAGE_SIZE TARGET_PAGE_SIZE
31

    
32
//#define DEBUG_KVM
33

    
34
#ifdef DEBUG_KVM
35
#define dprintf(fmt, ...) \
36
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
37
#else
38
#define dprintf(fmt, ...) \
39
    do { } while (0)
40
#endif
41

    
42
typedef struct KVMSlot
43
{
44
    target_phys_addr_t start_addr;
45
    ram_addr_t memory_size;
46
    ram_addr_t phys_offset;
47
    int slot;
48
    int flags;
49
} KVMSlot;
50

    
51
typedef struct kvm_dirty_log KVMDirtyLog;
52

    
53
int kvm_allowed = 0;
54

    
55
struct KVMState
56
{
57
    KVMSlot slots[32];
58
    int fd;
59
    int vmfd;
60
    int coalesced_mmio;
61
    int broken_set_mem_region;
62
    int migration_log;
63
#ifdef KVM_CAP_SET_GUEST_DEBUG
64
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
65
#endif
66
};
67

    
68
static KVMState *kvm_state;
69

    
70
static KVMSlot *kvm_alloc_slot(KVMState *s)
71
{
72
    int i;
73

    
74
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
75
        /* KVM private memory slots */
76
        if (i >= 8 && i < 12)
77
            continue;
78
        if (s->slots[i].memory_size == 0)
79
            return &s->slots[i];
80
    }
81

    
82
    fprintf(stderr, "%s: no free slot available\n", __func__);
83
    abort();
84
}
85

    
86
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
87
                                         target_phys_addr_t start_addr,
88
                                         target_phys_addr_t end_addr)
89
{
90
    int i;
91

    
92
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
93
        KVMSlot *mem = &s->slots[i];
94

    
95
        if (start_addr == mem->start_addr &&
96
            end_addr == mem->start_addr + mem->memory_size) {
97
            return mem;
98
        }
99
    }
100

    
101
    return NULL;
102
}
103

    
104
/*
105
 * Find overlapping slot with lowest start address
106
 */
107
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
108
                                            target_phys_addr_t start_addr,
109
                                            target_phys_addr_t end_addr)
110
{
111
    KVMSlot *found = NULL;
112
    int i;
113

    
114
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
115
        KVMSlot *mem = &s->slots[i];
116

    
117
        if (mem->memory_size == 0 ||
118
            (found && found->start_addr < mem->start_addr)) {
119
            continue;
120
        }
121

    
122
        if (end_addr > mem->start_addr &&
123
            start_addr < mem->start_addr + mem->memory_size) {
124
            found = mem;
125
        }
126
    }
127

    
128
    return found;
129
}
130

    
131
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
132
{
133
    struct kvm_userspace_memory_region mem;
134

    
135
    mem.slot = slot->slot;
136
    mem.guest_phys_addr = slot->start_addr;
137
    mem.memory_size = slot->memory_size;
138
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
139
    mem.flags = slot->flags;
140
    if (s->migration_log) {
141
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
142
    }
143
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
144
}
145

    
146

    
147
int kvm_init_vcpu(CPUState *env)
148
{
149
    KVMState *s = kvm_state;
150
    long mmap_size;
151
    int ret;
152

    
153
    dprintf("kvm_init_vcpu\n");
154

    
155
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
156
    if (ret < 0) {
157
        dprintf("kvm_create_vcpu failed\n");
158
        goto err;
159
    }
160

    
161
    env->kvm_fd = ret;
162
    env->kvm_state = s;
163

    
164
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
165
    if (mmap_size < 0) {
166
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
167
        goto err;
168
    }
169

    
170
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
171
                        env->kvm_fd, 0);
172
    if (env->kvm_run == MAP_FAILED) {
173
        ret = -errno;
174
        dprintf("mmap'ing vcpu state failed\n");
175
        goto err;
176
    }
177

    
178
    ret = kvm_arch_init_vcpu(env);
179

    
180
err:
181
    return ret;
182
}
183

    
184
int kvm_put_mp_state(CPUState *env)
185
{
186
    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
187

    
188
    return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
189
}
190

    
191
int kvm_get_mp_state(CPUState *env)
192
{
193
    struct kvm_mp_state mp_state;
194
    int ret;
195

    
196
    ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
197
    if (ret < 0) {
198
        return ret;
199
    }
200
    env->mp_state = mp_state.mp_state;
201
    return 0;
202
}
203

    
204
int kvm_sync_vcpus(void)
205
{
206
    CPUState *env;
207

    
208
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
209
        int ret;
210

    
211
        ret = kvm_arch_put_registers(env);
212
        if (ret)
213
            return ret;
214
    }
215

    
216
    return 0;
217
}
218

    
219
/*
220
 * dirty pages logging control
221
 */
222
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
223
                                      ram_addr_t size, int flags, int mask)
224
{
225
    KVMState *s = kvm_state;
226
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
227
    int old_flags;
228

    
229
    if (mem == NULL)  {
230
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
231
                    TARGET_FMT_plx "\n", __func__, phys_addr,
232
                    phys_addr + size - 1);
233
            return -EINVAL;
234
    }
235

    
236
    old_flags = mem->flags;
237

    
238
    flags = (mem->flags & ~mask) | flags;
239
    mem->flags = flags;
240

    
241
    /* If nothing changed effectively, no need to issue ioctl */
242
    if (s->migration_log) {
243
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
244
    }
245
    if (flags == old_flags) {
246
            return 0;
247
    }
248

    
249
    return kvm_set_user_memory_region(s, mem);
250
}
251

    
252
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
253
{
254
        return kvm_dirty_pages_log_change(phys_addr, size,
255
                                          KVM_MEM_LOG_DIRTY_PAGES,
256
                                          KVM_MEM_LOG_DIRTY_PAGES);
257
}
258

    
259
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
260
{
261
        return kvm_dirty_pages_log_change(phys_addr, size,
262
                                          0,
263
                                          KVM_MEM_LOG_DIRTY_PAGES);
264
}
265

    
266
int kvm_set_migration_log(int enable)
267
{
268
    KVMState *s = kvm_state;
269
    KVMSlot *mem;
270
    int i, err;
271

    
272
    s->migration_log = enable;
273

    
274
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
275
        mem = &s->slots[i];
276

    
277
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
278
            continue;
279
        }
280
        err = kvm_set_user_memory_region(s, mem);
281
        if (err) {
282
            return err;
283
        }
284
    }
285
    return 0;
286
}
287

    
288
/**
289
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
290
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
291
 * This means all bits are set to dirty.
292
 *
293
 * @start_add: start of logged region.
294
 * @end_addr: end of logged region.
295
 */
296
int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
297
                                   target_phys_addr_t end_addr)
298
{
299
    KVMState *s = kvm_state;
300
    unsigned long size, allocated_size = 0;
301
    target_phys_addr_t phys_addr;
302
    ram_addr_t addr;
303
    KVMDirtyLog d;
304
    KVMSlot *mem;
305
    int ret = 0;
306

    
307
    d.dirty_bitmap = NULL;
308
    while (start_addr < end_addr) {
309
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
310
        if (mem == NULL) {
311
            break;
312
        }
313

    
314
        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
315
        if (!d.dirty_bitmap) {
316
            d.dirty_bitmap = qemu_malloc(size);
317
        } else if (size > allocated_size) {
318
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
319
        }
320
        allocated_size = size;
321
        memset(d.dirty_bitmap, 0, allocated_size);
322

    
323
        d.slot = mem->slot;
324

    
325
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
326
            dprintf("ioctl failed %d\n", errno);
327
            ret = -1;
328
            break;
329
        }
330

    
331
        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
332
             phys_addr < mem->start_addr + mem->memory_size;
333
             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
334
            unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
335
            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
336
            unsigned word = nr / (sizeof(*bitmap) * 8);
337
            unsigned bit = nr % (sizeof(*bitmap) * 8);
338

    
339
            if ((bitmap[word] >> bit) & 1) {
340
                cpu_physical_memory_set_dirty(addr);
341
            }
342
        }
343
        start_addr = phys_addr;
344
    }
345
    qemu_free(d.dirty_bitmap);
346

    
347
    return ret;
348
}
349

    
350
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
351
{
352
    int ret = -ENOSYS;
353
#ifdef KVM_CAP_COALESCED_MMIO
354
    KVMState *s = kvm_state;
355

    
356
    if (s->coalesced_mmio) {
357
        struct kvm_coalesced_mmio_zone zone;
358

    
359
        zone.addr = start;
360
        zone.size = size;
361

    
362
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
363
    }
364
#endif
365

    
366
    return ret;
367
}
368

    
369
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
370
{
371
    int ret = -ENOSYS;
372
#ifdef KVM_CAP_COALESCED_MMIO
373
    KVMState *s = kvm_state;
374

    
375
    if (s->coalesced_mmio) {
376
        struct kvm_coalesced_mmio_zone zone;
377

    
378
        zone.addr = start;
379
        zone.size = size;
380

    
381
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
382
    }
383
#endif
384

    
385
    return ret;
386
}
387

    
388
int kvm_check_extension(KVMState *s, unsigned int extension)
389
{
390
    int ret;
391

    
392
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
393
    if (ret < 0) {
394
        ret = 0;
395
    }
396

    
397
    return ret;
398
}
399

    
400
static void kvm_reset_vcpus(void *opaque)
401
{
402
    kvm_sync_vcpus();
403
}
404

    
405
int kvm_init(int smp_cpus)
406
{
407
    static const char upgrade_note[] =
408
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
409
        "(see http://sourceforge.net/projects/kvm).\n";
410
    KVMState *s;
411
    int ret;
412
    int i;
413

    
414
    if (smp_cpus > 1) {
415
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
416
        return -EINVAL;
417
    }
418

    
419
    s = qemu_mallocz(sizeof(KVMState));
420

    
421
#ifdef KVM_CAP_SET_GUEST_DEBUG
422
    TAILQ_INIT(&s->kvm_sw_breakpoints);
423
#endif
424
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
425
        s->slots[i].slot = i;
426

    
427
    s->vmfd = -1;
428
    s->fd = open("/dev/kvm", O_RDWR);
429
    if (s->fd == -1) {
430
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
431
        ret = -errno;
432
        goto err;
433
    }
434

    
435
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
436
    if (ret < KVM_API_VERSION) {
437
        if (ret > 0)
438
            ret = -EINVAL;
439
        fprintf(stderr, "kvm version too old\n");
440
        goto err;
441
    }
442

    
443
    if (ret > KVM_API_VERSION) {
444
        ret = -EINVAL;
445
        fprintf(stderr, "kvm version not supported\n");
446
        goto err;
447
    }
448

    
449
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
450
    if (s->vmfd < 0)
451
        goto err;
452

    
453
    /* initially, KVM allocated its own memory and we had to jump through
454
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
455
     * just use a user allocated buffer so we can use regular pages
456
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
457
     */
458
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
459
        ret = -EINVAL;
460
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
461
                upgrade_note);
462
        goto err;
463
    }
464

    
465
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
466
     * destroyed properly.  Since we rely on this capability, refuse to work
467
     * with any kernel without this capability. */
468
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
469
        ret = -EINVAL;
470

    
471
        fprintf(stderr,
472
                "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
473
                upgrade_note);
474
        goto err;
475
    }
476

    
477
#ifdef KVM_CAP_COALESCED_MMIO
478
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
479
#else
480
    s->coalesced_mmio = 0;
481
#endif
482

    
483
    s->broken_set_mem_region = 1;
484
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
485
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
486
    if (ret > 0) {
487
        s->broken_set_mem_region = 0;
488
    }
489
#endif
490

    
491
    ret = kvm_arch_init(s, smp_cpus);
492
    if (ret < 0)
493
        goto err;
494

    
495
    qemu_register_reset(kvm_reset_vcpus, INT_MAX, NULL);
496

    
497
    kvm_state = s;
498

    
499
    return 0;
500

    
501
err:
502
    if (s) {
503
        if (s->vmfd != -1)
504
            close(s->vmfd);
505
        if (s->fd != -1)
506
            close(s->fd);
507
    }
508
    qemu_free(s);
509

    
510
    return ret;
511
}
512

    
513
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
514
                         int direction, int size, uint32_t count)
515
{
516
    int i;
517
    uint8_t *ptr = data;
518

    
519
    for (i = 0; i < count; i++) {
520
        if (direction == KVM_EXIT_IO_IN) {
521
            switch (size) {
522
            case 1:
523
                stb_p(ptr, cpu_inb(env, port));
524
                break;
525
            case 2:
526
                stw_p(ptr, cpu_inw(env, port));
527
                break;
528
            case 4:
529
                stl_p(ptr, cpu_inl(env, port));
530
                break;
531
            }
532
        } else {
533
            switch (size) {
534
            case 1:
535
                cpu_outb(env, port, ldub_p(ptr));
536
                break;
537
            case 2:
538
                cpu_outw(env, port, lduw_p(ptr));
539
                break;
540
            case 4:
541
                cpu_outl(env, port, ldl_p(ptr));
542
                break;
543
            }
544
        }
545

    
546
        ptr += size;
547
    }
548

    
549
    return 1;
550
}
551

    
552
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
553
{
554
#ifdef KVM_CAP_COALESCED_MMIO
555
    KVMState *s = kvm_state;
556
    if (s->coalesced_mmio) {
557
        struct kvm_coalesced_mmio_ring *ring;
558

    
559
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
560
        while (ring->first != ring->last) {
561
            struct kvm_coalesced_mmio *ent;
562

    
563
            ent = &ring->coalesced_mmio[ring->first];
564

    
565
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
566
            /* FIXME smp_wmb() */
567
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
568
        }
569
    }
570
#endif
571
}
572

    
573
int kvm_cpu_exec(CPUState *env)
574
{
575
    struct kvm_run *run = env->kvm_run;
576
    int ret;
577

    
578
    dprintf("kvm_cpu_exec()\n");
579

    
580
    do {
581
        if (env->exit_request) {
582
            dprintf("interrupt exit requested\n");
583
            ret = 0;
584
            break;
585
        }
586

    
587
        kvm_arch_pre_run(env, run);
588
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
589
        kvm_arch_post_run(env, run);
590

    
591
        if (ret == -EINTR || ret == -EAGAIN) {
592
            dprintf("io window exit\n");
593
            ret = 0;
594
            break;
595
        }
596

    
597
        if (ret < 0) {
598
            dprintf("kvm run failed %s\n", strerror(-ret));
599
            abort();
600
        }
601

    
602
        kvm_run_coalesced_mmio(env, run);
603

    
604
        ret = 0; /* exit loop */
605
        switch (run->exit_reason) {
606
        case KVM_EXIT_IO:
607
            dprintf("handle_io\n");
608
            ret = kvm_handle_io(env, run->io.port,
609
                                (uint8_t *)run + run->io.data_offset,
610
                                run->io.direction,
611
                                run->io.size,
612
                                run->io.count);
613
            break;
614
        case KVM_EXIT_MMIO:
615
            dprintf("handle_mmio\n");
616
            cpu_physical_memory_rw(run->mmio.phys_addr,
617
                                   run->mmio.data,
618
                                   run->mmio.len,
619
                                   run->mmio.is_write);
620
            ret = 1;
621
            break;
622
        case KVM_EXIT_IRQ_WINDOW_OPEN:
623
            dprintf("irq_window_open\n");
624
            break;
625
        case KVM_EXIT_SHUTDOWN:
626
            dprintf("shutdown\n");
627
            qemu_system_reset_request();
628
            ret = 1;
629
            break;
630
        case KVM_EXIT_UNKNOWN:
631
            dprintf("kvm_exit_unknown\n");
632
            break;
633
        case KVM_EXIT_FAIL_ENTRY:
634
            dprintf("kvm_exit_fail_entry\n");
635
            break;
636
        case KVM_EXIT_EXCEPTION:
637
            dprintf("kvm_exit_exception\n");
638
            break;
639
        case KVM_EXIT_DEBUG:
640
            dprintf("kvm_exit_debug\n");
641
#ifdef KVM_CAP_SET_GUEST_DEBUG
642
            if (kvm_arch_debug(&run->debug.arch)) {
643
                gdb_set_stop_cpu(env);
644
                vm_stop(EXCP_DEBUG);
645
                env->exception_index = EXCP_DEBUG;
646
                return 0;
647
            }
648
            /* re-enter, this exception was guest-internal */
649
            ret = 1;
650
#endif /* KVM_CAP_SET_GUEST_DEBUG */
651
            break;
652
        default:
653
            dprintf("kvm_arch_handle_exit\n");
654
            ret = kvm_arch_handle_exit(env, run);
655
            break;
656
        }
657
    } while (ret > 0);
658

    
659
    if (env->exit_request) {
660
        env->exit_request = 0;
661
        env->exception_index = EXCP_INTERRUPT;
662
    }
663

    
664
    return ret;
665
}
666

    
667
void kvm_set_phys_mem(target_phys_addr_t start_addr,
668
                      ram_addr_t size,
669
                      ram_addr_t phys_offset)
670
{
671
    KVMState *s = kvm_state;
672
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
673
    KVMSlot *mem, old;
674
    int err;
675

    
676
    if (start_addr & ~TARGET_PAGE_MASK) {
677
        if (flags >= IO_MEM_UNASSIGNED) {
678
            if (!kvm_lookup_overlapping_slot(s, start_addr,
679
                                             start_addr + size)) {
680
                return;
681
            }
682
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
683
        } else {
684
            fprintf(stderr, "Only page-aligned memory slots supported\n");
685
        }
686
        abort();
687
    }
688

    
689
    /* KVM does not support read-only slots */
690
    phys_offset &= ~IO_MEM_ROM;
691

    
692
    while (1) {
693
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
694
        if (!mem) {
695
            break;
696
        }
697

    
698
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
699
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
700
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
701
            /* The new slot fits into the existing one and comes with
702
             * identical parameters - nothing to be done. */
703
            return;
704
        }
705

    
706
        old = *mem;
707

    
708
        /* unregister the overlapping slot */
709
        mem->memory_size = 0;
710
        err = kvm_set_user_memory_region(s, mem);
711
        if (err) {
712
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
713
                    __func__, strerror(-err));
714
            abort();
715
        }
716

    
717
        /* Workaround for older KVM versions: we can't join slots, even not by
718
         * unregistering the previous ones and then registering the larger
719
         * slot. We have to maintain the existing fragmentation. Sigh.
720
         *
721
         * This workaround assumes that the new slot starts at the same
722
         * address as the first existing one. If not or if some overlapping
723
         * slot comes around later, we will fail (not seen in practice so far)
724
         * - and actually require a recent KVM version. */
725
        if (s->broken_set_mem_region &&
726
            old.start_addr == start_addr && old.memory_size < size &&
727
            flags < IO_MEM_UNASSIGNED) {
728
            mem = kvm_alloc_slot(s);
729
            mem->memory_size = old.memory_size;
730
            mem->start_addr = old.start_addr;
731
            mem->phys_offset = old.phys_offset;
732
            mem->flags = 0;
733

    
734
            err = kvm_set_user_memory_region(s, mem);
735
            if (err) {
736
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
737
                        strerror(-err));
738
                abort();
739
            }
740

    
741
            start_addr += old.memory_size;
742
            phys_offset += old.memory_size;
743
            size -= old.memory_size;
744
            continue;
745
        }
746

    
747
        /* register prefix slot */
748
        if (old.start_addr < start_addr) {
749
            mem = kvm_alloc_slot(s);
750
            mem->memory_size = start_addr - old.start_addr;
751
            mem->start_addr = old.start_addr;
752
            mem->phys_offset = old.phys_offset;
753
            mem->flags = 0;
754

    
755
            err = kvm_set_user_memory_region(s, mem);
756
            if (err) {
757
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
758
                        __func__, strerror(-err));
759
                abort();
760
            }
761
        }
762

    
763
        /* register suffix slot */
764
        if (old.start_addr + old.memory_size > start_addr + size) {
765
            ram_addr_t size_delta;
766

    
767
            mem = kvm_alloc_slot(s);
768
            mem->start_addr = start_addr + size;
769
            size_delta = mem->start_addr - old.start_addr;
770
            mem->memory_size = old.memory_size - size_delta;
771
            mem->phys_offset = old.phys_offset + size_delta;
772
            mem->flags = 0;
773

    
774
            err = kvm_set_user_memory_region(s, mem);
775
            if (err) {
776
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
777
                        __func__, strerror(-err));
778
                abort();
779
            }
780
        }
781
    }
782

    
783
    /* in case the KVM bug workaround already "consumed" the new slot */
784
    if (!size)
785
        return;
786

    
787
    /* KVM does not need to know about this memory */
788
    if (flags >= IO_MEM_UNASSIGNED)
789
        return;
790

    
791
    mem = kvm_alloc_slot(s);
792
    mem->memory_size = size;
793
    mem->start_addr = start_addr;
794
    mem->phys_offset = phys_offset;
795
    mem->flags = 0;
796

    
797
    err = kvm_set_user_memory_region(s, mem);
798
    if (err) {
799
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
800
                strerror(-err));
801
        abort();
802
    }
803
}
804

    
805
int kvm_ioctl(KVMState *s, int type, ...)
806
{
807
    int ret;
808
    void *arg;
809
    va_list ap;
810

    
811
    va_start(ap, type);
812
    arg = va_arg(ap, void *);
813
    va_end(ap);
814

    
815
    ret = ioctl(s->fd, type, arg);
816
    if (ret == -1)
817
        ret = -errno;
818

    
819
    return ret;
820
}
821

    
822
int kvm_vm_ioctl(KVMState *s, int type, ...)
823
{
824
    int ret;
825
    void *arg;
826
    va_list ap;
827

    
828
    va_start(ap, type);
829
    arg = va_arg(ap, void *);
830
    va_end(ap);
831

    
832
    ret = ioctl(s->vmfd, type, arg);
833
    if (ret == -1)
834
        ret = -errno;
835

    
836
    return ret;
837
}
838

    
839
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
840
{
841
    int ret;
842
    void *arg;
843
    va_list ap;
844

    
845
    va_start(ap, type);
846
    arg = va_arg(ap, void *);
847
    va_end(ap);
848

    
849
    ret = ioctl(env->kvm_fd, type, arg);
850
    if (ret == -1)
851
        ret = -errno;
852

    
853
    return ret;
854
}
855

    
856
int kvm_has_sync_mmu(void)
857
{
858
#ifdef KVM_CAP_SYNC_MMU
859
    KVMState *s = kvm_state;
860

    
861
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
862
#else
863
    return 0;
864
#endif
865
}
866

    
867
void kvm_setup_guest_memory(void *start, size_t size)
868
{
869
    if (!kvm_has_sync_mmu()) {
870
#ifdef MADV_DONTFORK
871
        int ret = madvise(start, size, MADV_DONTFORK);
872

    
873
        if (ret) {
874
            perror("madvice");
875
            exit(1);
876
        }
877
#else
878
        fprintf(stderr,
879
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
880
        exit(1);
881
#endif
882
    }
883
}
884

    
885
#ifdef KVM_CAP_SET_GUEST_DEBUG
886
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
887
                                                 target_ulong pc)
888
{
889
    struct kvm_sw_breakpoint *bp;
890

    
891
    TAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
892
        if (bp->pc == pc)
893
            return bp;
894
    }
895
    return NULL;
896
}
897

    
898
int kvm_sw_breakpoints_active(CPUState *env)
899
{
900
    return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
901
}
902

    
903
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
904
{
905
    struct kvm_guest_debug dbg;
906

    
907
    dbg.control = 0;
908
    if (env->singlestep_enabled)
909
        dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
910

    
911
    kvm_arch_update_guest_debug(env, &dbg);
912
    dbg.control |= reinject_trap;
913

    
914
    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
915
}
916

    
917
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
918
                          target_ulong len, int type)
919
{
920
    struct kvm_sw_breakpoint *bp;
921
    CPUState *env;
922
    int err;
923

    
924
    if (type == GDB_BREAKPOINT_SW) {
925
        bp = kvm_find_sw_breakpoint(current_env, addr);
926
        if (bp) {
927
            bp->use_count++;
928
            return 0;
929
        }
930

    
931
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
932
        if (!bp)
933
            return -ENOMEM;
934

    
935
        bp->pc = addr;
936
        bp->use_count = 1;
937
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
938
        if (err) {
939
            free(bp);
940
            return err;
941
        }
942

    
943
        TAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
944
                          bp, entry);
945
    } else {
946
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
947
        if (err)
948
            return err;
949
    }
950

    
951
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
952
        err = kvm_update_guest_debug(env, 0);
953
        if (err)
954
            return err;
955
    }
956
    return 0;
957
}
958

    
959
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
960
                          target_ulong len, int type)
961
{
962
    struct kvm_sw_breakpoint *bp;
963
    CPUState *env;
964
    int err;
965

    
966
    if (type == GDB_BREAKPOINT_SW) {
967
        bp = kvm_find_sw_breakpoint(current_env, addr);
968
        if (!bp)
969
            return -ENOENT;
970

    
971
        if (bp->use_count > 1) {
972
            bp->use_count--;
973
            return 0;
974
        }
975

    
976
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
977
        if (err)
978
            return err;
979

    
980
        TAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
981
        qemu_free(bp);
982
    } else {
983
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
984
        if (err)
985
            return err;
986
    }
987

    
988
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
989
        err = kvm_update_guest_debug(env, 0);
990
        if (err)
991
            return err;
992
    }
993
    return 0;
994
}
995

    
996
void kvm_remove_all_breakpoints(CPUState *current_env)
997
{
998
    struct kvm_sw_breakpoint *bp, *next;
999
    KVMState *s = current_env->kvm_state;
1000
    CPUState *env;
1001

    
1002
    TAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1003
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1004
            /* Try harder to find a CPU that currently sees the breakpoint. */
1005
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
1006
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1007
                    break;
1008
            }
1009
        }
1010
    }
1011
    kvm_arch_remove_all_hw_breakpoints();
1012

    
1013
    for (env = first_cpu; env != NULL; env = env->next_cpu)
1014
        kvm_update_guest_debug(env, 0);
1015
}
1016

    
1017
#else /* !KVM_CAP_SET_GUEST_DEBUG */
1018

    
1019
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1020
{
1021
    return -EINVAL;
1022
}
1023

    
1024
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1025
                          target_ulong len, int type)
1026
{
1027
    return -EINVAL;
1028
}
1029

    
1030
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1031
                          target_ulong len, int type)
1032
{
1033
    return -EINVAL;
1034
}
1035

    
1036
void kvm_remove_all_breakpoints(CPUState *current_env)
1037
{
1038
}
1039
#endif /* !KVM_CAP_SET_GUEST_DEBUG */