Statistics
| Branch: | Revision:

root / kvm-all.c @ f8d926e9

History | View | Annotate | Download (26.1 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "hw/hw.h"
26
#include "gdbstub.h"
27
#include "kvm.h"
28

    
29
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
30
#define PAGE_SIZE TARGET_PAGE_SIZE
31

    
32
//#define DEBUG_KVM
33

    
34
#ifdef DEBUG_KVM
35
#define dprintf(fmt, ...) \
36
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
37
#else
38
#define dprintf(fmt, ...) \
39
    do { } while (0)
40
#endif
41

    
42
typedef struct KVMSlot
43
{
44
    target_phys_addr_t start_addr;
45
    ram_addr_t memory_size;
46
    ram_addr_t phys_offset;
47
    int slot;
48
    int flags;
49
} KVMSlot;
50

    
51
typedef struct kvm_dirty_log KVMDirtyLog;
52

    
53
int kvm_allowed = 0;
54

    
55
struct KVMState
56
{
57
    KVMSlot slots[32];
58
    int fd;
59
    int vmfd;
60
    int coalesced_mmio;
61
    int broken_set_mem_region;
62
    int migration_log;
63
#ifdef KVM_CAP_SET_GUEST_DEBUG
64
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
65
#endif
66
};
67

    
68
static KVMState *kvm_state;
69

    
70
static KVMSlot *kvm_alloc_slot(KVMState *s)
71
{
72
    int i;
73

    
74
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
75
        /* KVM private memory slots */
76
        if (i >= 8 && i < 12)
77
            continue;
78
        if (s->slots[i].memory_size == 0)
79
            return &s->slots[i];
80
    }
81

    
82
    fprintf(stderr, "%s: no free slot available\n", __func__);
83
    abort();
84
}
85

    
86
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
87
                                         target_phys_addr_t start_addr,
88
                                         target_phys_addr_t end_addr)
89
{
90
    int i;
91

    
92
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
93
        KVMSlot *mem = &s->slots[i];
94

    
95
        if (start_addr == mem->start_addr &&
96
            end_addr == mem->start_addr + mem->memory_size) {
97
            return mem;
98
        }
99
    }
100

    
101
    return NULL;
102
}
103

    
104
/*
105
 * Find overlapping slot with lowest start address
106
 */
107
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
108
                                            target_phys_addr_t start_addr,
109
                                            target_phys_addr_t end_addr)
110
{
111
    KVMSlot *found = NULL;
112
    int i;
113

    
114
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
115
        KVMSlot *mem = &s->slots[i];
116

    
117
        if (mem->memory_size == 0 ||
118
            (found && found->start_addr < mem->start_addr)) {
119
            continue;
120
        }
121

    
122
        if (end_addr > mem->start_addr &&
123
            start_addr < mem->start_addr + mem->memory_size) {
124
            found = mem;
125
        }
126
    }
127

    
128
    return found;
129
}
130

    
131
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
132
{
133
    struct kvm_userspace_memory_region mem;
134

    
135
    mem.slot = slot->slot;
136
    mem.guest_phys_addr = slot->start_addr;
137
    mem.memory_size = slot->memory_size;
138
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
139
    mem.flags = slot->flags;
140
    if (s->migration_log) {
141
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
142
    }
143
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
144
}
145

    
146

    
147
int kvm_init_vcpu(CPUState *env)
148
{
149
    KVMState *s = kvm_state;
150
    long mmap_size;
151
    int ret;
152

    
153
    dprintf("kvm_init_vcpu\n");
154

    
155
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
156
    if (ret < 0) {
157
        dprintf("kvm_create_vcpu failed\n");
158
        goto err;
159
    }
160

    
161
    env->kvm_fd = ret;
162
    env->kvm_state = s;
163

    
164
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
165
    if (mmap_size < 0) {
166
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
167
        goto err;
168
    }
169

    
170
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
171
                        env->kvm_fd, 0);
172
    if (env->kvm_run == MAP_FAILED) {
173
        ret = -errno;
174
        dprintf("mmap'ing vcpu state failed\n");
175
        goto err;
176
    }
177

    
178
    ret = kvm_arch_init_vcpu(env);
179

    
180
err:
181
    return ret;
182
}
183

    
184
int kvm_put_mp_state(CPUState *env)
185
{
186
    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
187

    
188
    return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
189
}
190

    
191
int kvm_get_mp_state(CPUState *env)
192
{
193
    struct kvm_mp_state mp_state;
194
    int ret;
195

    
196
    ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
197
    if (ret < 0) {
198
        return ret;
199
    }
200
    env->mp_state = mp_state.mp_state;
201
    return 0;
202
}
203

    
204
int kvm_sync_vcpus(void)
205
{
206
    CPUState *env;
207

    
208
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
209
        int ret;
210

    
211
        ret = kvm_arch_put_registers(env);
212
        if (ret)
213
            return ret;
214
    }
215

    
216
    return 0;
217
}
218

    
219
/*
220
 * dirty pages logging control
221
 */
222
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
223
                                      ram_addr_t size, int flags, int mask)
224
{
225
    KVMState *s = kvm_state;
226
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
227
    int old_flags;
228

    
229
    if (mem == NULL)  {
230
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
231
                    TARGET_FMT_plx "\n", __func__, phys_addr,
232
                    phys_addr + size - 1);
233
            return -EINVAL;
234
    }
235

    
236
    old_flags = mem->flags;
237

    
238
    flags = (mem->flags & ~mask) | flags;
239
    mem->flags = flags;
240

    
241
    /* If nothing changed effectively, no need to issue ioctl */
242
    if (s->migration_log) {
243
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
244
    }
245
    if (flags == old_flags) {
246
            return 0;
247
    }
248

    
249
    return kvm_set_user_memory_region(s, mem);
250
}
251

    
252
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
253
{
254
        return kvm_dirty_pages_log_change(phys_addr, size,
255
                                          KVM_MEM_LOG_DIRTY_PAGES,
256
                                          KVM_MEM_LOG_DIRTY_PAGES);
257
}
258

    
259
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
260
{
261
        return kvm_dirty_pages_log_change(phys_addr, size,
262
                                          0,
263
                                          KVM_MEM_LOG_DIRTY_PAGES);
264
}
265

    
266
int kvm_set_migration_log(int enable)
267
{
268
    KVMState *s = kvm_state;
269
    KVMSlot *mem;
270
    int i, err;
271

    
272
    s->migration_log = enable;
273

    
274
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
275
        mem = &s->slots[i];
276

    
277
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
278
            continue;
279
        }
280
        err = kvm_set_user_memory_region(s, mem);
281
        if (err) {
282
            return err;
283
        }
284
    }
285
    return 0;
286
}
287

    
288
/**
289
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
290
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
291
 * This means all bits are set to dirty.
292
 *
293
 * @start_add: start of logged region.
294
 * @end_addr: end of logged region.
295
 */
296
int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
297
                                   target_phys_addr_t end_addr)
298
{
299
    KVMState *s = kvm_state;
300
    unsigned long size, allocated_size = 0;
301
    target_phys_addr_t phys_addr;
302
    ram_addr_t addr;
303
    KVMDirtyLog d;
304
    KVMSlot *mem;
305
    int ret = 0;
306

    
307
    d.dirty_bitmap = NULL;
308
    while (start_addr < end_addr) {
309
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
310
        if (mem == NULL) {
311
            break;
312
        }
313

    
314
        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
315
        if (!d.dirty_bitmap) {
316
            d.dirty_bitmap = qemu_malloc(size);
317
        } else if (size > allocated_size) {
318
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
319
        }
320
        allocated_size = size;
321
        memset(d.dirty_bitmap, 0, allocated_size);
322

    
323
        d.slot = mem->slot;
324

    
325
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
326
            dprintf("ioctl failed %d\n", errno);
327
            ret = -1;
328
            break;
329
        }
330

    
331
        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
332
             phys_addr < mem->start_addr + mem->memory_size;
333
             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
334
            unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
335
            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
336
            unsigned word = nr / (sizeof(*bitmap) * 8);
337
            unsigned bit = nr % (sizeof(*bitmap) * 8);
338

    
339
            if ((bitmap[word] >> bit) & 1) {
340
                cpu_physical_memory_set_dirty(addr);
341
            }
342
        }
343
        start_addr = phys_addr;
344
    }
345
    qemu_free(d.dirty_bitmap);
346

    
347
    return ret;
348
}
349

    
350
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
351
{
352
    int ret = -ENOSYS;
353
#ifdef KVM_CAP_COALESCED_MMIO
354
    KVMState *s = kvm_state;
355

    
356
    if (s->coalesced_mmio) {
357
        struct kvm_coalesced_mmio_zone zone;
358

    
359
        zone.addr = start;
360
        zone.size = size;
361

    
362
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
363
    }
364
#endif
365

    
366
    return ret;
367
}
368

    
369
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
370
{
371
    int ret = -ENOSYS;
372
#ifdef KVM_CAP_COALESCED_MMIO
373
    KVMState *s = kvm_state;
374

    
375
    if (s->coalesced_mmio) {
376
        struct kvm_coalesced_mmio_zone zone;
377

    
378
        zone.addr = start;
379
        zone.size = size;
380

    
381
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
382
    }
383
#endif
384

    
385
    return ret;
386
}
387

    
388
int kvm_check_extension(KVMState *s, unsigned int extension)
389
{
390
    int ret;
391

    
392
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
393
    if (ret < 0) {
394
        ret = 0;
395
    }
396

    
397
    return ret;
398
}
399

    
400
static void kvm_reset_vcpus(void *opaque)
401
{
402
    kvm_sync_vcpus();
403
}
404

    
405
int kvm_init(int smp_cpus)
406
{
407
    KVMState *s;
408
    int ret;
409
    int i;
410

    
411
    if (smp_cpus > 1) {
412
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
413
        return -EINVAL;
414
    }
415

    
416
    s = qemu_mallocz(sizeof(KVMState));
417

    
418
#ifdef KVM_CAP_SET_GUEST_DEBUG
419
    TAILQ_INIT(&s->kvm_sw_breakpoints);
420
#endif
421
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
422
        s->slots[i].slot = i;
423

    
424
    s->vmfd = -1;
425
    s->fd = open("/dev/kvm", O_RDWR);
426
    if (s->fd == -1) {
427
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
428
        ret = -errno;
429
        goto err;
430
    }
431

    
432
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
433
    if (ret < KVM_API_VERSION) {
434
        if (ret > 0)
435
            ret = -EINVAL;
436
        fprintf(stderr, "kvm version too old\n");
437
        goto err;
438
    }
439

    
440
    if (ret > KVM_API_VERSION) {
441
        ret = -EINVAL;
442
        fprintf(stderr, "kvm version not supported\n");
443
        goto err;
444
    }
445

    
446
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
447
    if (s->vmfd < 0)
448
        goto err;
449

    
450
    /* initially, KVM allocated its own memory and we had to jump through
451
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
452
     * just use a user allocated buffer so we can use regular pages
453
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
454
     */
455
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
456
        ret = -EINVAL;
457
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
458
        goto err;
459
    }
460

    
461
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
462
     * destroyed properly.  Since we rely on this capability, refuse to work
463
     * with any kernel without this capability. */
464
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
465
        ret = -EINVAL;
466

    
467
        fprintf(stderr,
468
                "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
469
                "Please upgrade to at least kvm-81.\n");
470
        goto err;
471
    }
472

    
473
#ifdef KVM_CAP_COALESCED_MMIO
474
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
475
#else
476
    s->coalesced_mmio = 0;
477
#endif
478

    
479
    s->broken_set_mem_region = 1;
480
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
481
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
482
    if (ret > 0) {
483
        s->broken_set_mem_region = 0;
484
    }
485
#endif
486

    
487
    ret = kvm_arch_init(s, smp_cpus);
488
    if (ret < 0)
489
        goto err;
490

    
491
    qemu_register_reset(kvm_reset_vcpus, INT_MAX, NULL);
492

    
493
    kvm_state = s;
494

    
495
    return 0;
496

    
497
err:
498
    if (s) {
499
        if (s->vmfd != -1)
500
            close(s->vmfd);
501
        if (s->fd != -1)
502
            close(s->fd);
503
    }
504
    qemu_free(s);
505

    
506
    return ret;
507
}
508

    
509
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
510
                         int direction, int size, uint32_t count)
511
{
512
    int i;
513
    uint8_t *ptr = data;
514

    
515
    for (i = 0; i < count; i++) {
516
        if (direction == KVM_EXIT_IO_IN) {
517
            switch (size) {
518
            case 1:
519
                stb_p(ptr, cpu_inb(env, port));
520
                break;
521
            case 2:
522
                stw_p(ptr, cpu_inw(env, port));
523
                break;
524
            case 4:
525
                stl_p(ptr, cpu_inl(env, port));
526
                break;
527
            }
528
        } else {
529
            switch (size) {
530
            case 1:
531
                cpu_outb(env, port, ldub_p(ptr));
532
                break;
533
            case 2:
534
                cpu_outw(env, port, lduw_p(ptr));
535
                break;
536
            case 4:
537
                cpu_outl(env, port, ldl_p(ptr));
538
                break;
539
            }
540
        }
541

    
542
        ptr += size;
543
    }
544

    
545
    return 1;
546
}
547

    
548
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
549
{
550
#ifdef KVM_CAP_COALESCED_MMIO
551
    KVMState *s = kvm_state;
552
    if (s->coalesced_mmio) {
553
        struct kvm_coalesced_mmio_ring *ring;
554

    
555
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
556
        while (ring->first != ring->last) {
557
            struct kvm_coalesced_mmio *ent;
558

    
559
            ent = &ring->coalesced_mmio[ring->first];
560

    
561
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
562
            /* FIXME smp_wmb() */
563
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
564
        }
565
    }
566
#endif
567
}
568

    
569
int kvm_cpu_exec(CPUState *env)
570
{
571
    struct kvm_run *run = env->kvm_run;
572
    int ret;
573

    
574
    dprintf("kvm_cpu_exec()\n");
575

    
576
    do {
577
        kvm_arch_pre_run(env, run);
578

    
579
        if (env->exit_request) {
580
            dprintf("interrupt exit requested\n");
581
            ret = 0;
582
            break;
583
        }
584

    
585
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
586
        kvm_arch_post_run(env, run);
587

    
588
        if (ret == -EINTR || ret == -EAGAIN) {
589
            dprintf("io window exit\n");
590
            ret = 0;
591
            break;
592
        }
593

    
594
        if (ret < 0) {
595
            dprintf("kvm run failed %s\n", strerror(-ret));
596
            abort();
597
        }
598

    
599
        kvm_run_coalesced_mmio(env, run);
600

    
601
        ret = 0; /* exit loop */
602
        switch (run->exit_reason) {
603
        case KVM_EXIT_IO:
604
            dprintf("handle_io\n");
605
            ret = kvm_handle_io(env, run->io.port,
606
                                (uint8_t *)run + run->io.data_offset,
607
                                run->io.direction,
608
                                run->io.size,
609
                                run->io.count);
610
            break;
611
        case KVM_EXIT_MMIO:
612
            dprintf("handle_mmio\n");
613
            cpu_physical_memory_rw(run->mmio.phys_addr,
614
                                   run->mmio.data,
615
                                   run->mmio.len,
616
                                   run->mmio.is_write);
617
            ret = 1;
618
            break;
619
        case KVM_EXIT_IRQ_WINDOW_OPEN:
620
            dprintf("irq_window_open\n");
621
            break;
622
        case KVM_EXIT_SHUTDOWN:
623
            dprintf("shutdown\n");
624
            qemu_system_reset_request();
625
            ret = 1;
626
            break;
627
        case KVM_EXIT_UNKNOWN:
628
            dprintf("kvm_exit_unknown\n");
629
            break;
630
        case KVM_EXIT_FAIL_ENTRY:
631
            dprintf("kvm_exit_fail_entry\n");
632
            break;
633
        case KVM_EXIT_EXCEPTION:
634
            dprintf("kvm_exit_exception\n");
635
            break;
636
        case KVM_EXIT_DEBUG:
637
            dprintf("kvm_exit_debug\n");
638
#ifdef KVM_CAP_SET_GUEST_DEBUG
639
            if (kvm_arch_debug(&run->debug.arch)) {
640
                gdb_set_stop_cpu(env);
641
                vm_stop(EXCP_DEBUG);
642
                env->exception_index = EXCP_DEBUG;
643
                return 0;
644
            }
645
            /* re-enter, this exception was guest-internal */
646
            ret = 1;
647
#endif /* KVM_CAP_SET_GUEST_DEBUG */
648
            break;
649
        default:
650
            dprintf("kvm_arch_handle_exit\n");
651
            ret = kvm_arch_handle_exit(env, run);
652
            break;
653
        }
654
    } while (ret > 0);
655

    
656
    if (env->exit_request) {
657
        env->exit_request = 0;
658
        env->exception_index = EXCP_INTERRUPT;
659
    }
660

    
661
    return ret;
662
}
663

    
664
void kvm_set_phys_mem(target_phys_addr_t start_addr,
665
                      ram_addr_t size,
666
                      ram_addr_t phys_offset)
667
{
668
    KVMState *s = kvm_state;
669
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
670
    KVMSlot *mem, old;
671
    int err;
672

    
673
    if (start_addr & ~TARGET_PAGE_MASK) {
674
        if (flags >= IO_MEM_UNASSIGNED) {
675
            if (!kvm_lookup_overlapping_slot(s, start_addr,
676
                                             start_addr + size)) {
677
                return;
678
            }
679
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
680
        } else {
681
            fprintf(stderr, "Only page-aligned memory slots supported\n");
682
        }
683
        abort();
684
    }
685

    
686
    /* KVM does not support read-only slots */
687
    phys_offset &= ~IO_MEM_ROM;
688

    
689
    while (1) {
690
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
691
        if (!mem) {
692
            break;
693
        }
694

    
695
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
696
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
697
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
698
            /* The new slot fits into the existing one and comes with
699
             * identical parameters - nothing to be done. */
700
            return;
701
        }
702

    
703
        old = *mem;
704

    
705
        /* unregister the overlapping slot */
706
        mem->memory_size = 0;
707
        err = kvm_set_user_memory_region(s, mem);
708
        if (err) {
709
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
710
                    __func__, strerror(-err));
711
            abort();
712
        }
713

    
714
        /* Workaround for older KVM versions: we can't join slots, even not by
715
         * unregistering the previous ones and then registering the larger
716
         * slot. We have to maintain the existing fragmentation. Sigh.
717
         *
718
         * This workaround assumes that the new slot starts at the same
719
         * address as the first existing one. If not or if some overlapping
720
         * slot comes around later, we will fail (not seen in practice so far)
721
         * - and actually require a recent KVM version. */
722
        if (s->broken_set_mem_region &&
723
            old.start_addr == start_addr && old.memory_size < size &&
724
            flags < IO_MEM_UNASSIGNED) {
725
            mem = kvm_alloc_slot(s);
726
            mem->memory_size = old.memory_size;
727
            mem->start_addr = old.start_addr;
728
            mem->phys_offset = old.phys_offset;
729
            mem->flags = 0;
730

    
731
            err = kvm_set_user_memory_region(s, mem);
732
            if (err) {
733
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
734
                        strerror(-err));
735
                abort();
736
            }
737

    
738
            start_addr += old.memory_size;
739
            phys_offset += old.memory_size;
740
            size -= old.memory_size;
741
            continue;
742
        }
743

    
744
        /* register prefix slot */
745
        if (old.start_addr < start_addr) {
746
            mem = kvm_alloc_slot(s);
747
            mem->memory_size = start_addr - old.start_addr;
748
            mem->start_addr = old.start_addr;
749
            mem->phys_offset = old.phys_offset;
750
            mem->flags = 0;
751

    
752
            err = kvm_set_user_memory_region(s, mem);
753
            if (err) {
754
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
755
                        __func__, strerror(-err));
756
                abort();
757
            }
758
        }
759

    
760
        /* register suffix slot */
761
        if (old.start_addr + old.memory_size > start_addr + size) {
762
            ram_addr_t size_delta;
763

    
764
            mem = kvm_alloc_slot(s);
765
            mem->start_addr = start_addr + size;
766
            size_delta = mem->start_addr - old.start_addr;
767
            mem->memory_size = old.memory_size - size_delta;
768
            mem->phys_offset = old.phys_offset + size_delta;
769
            mem->flags = 0;
770

    
771
            err = kvm_set_user_memory_region(s, mem);
772
            if (err) {
773
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
774
                        __func__, strerror(-err));
775
                abort();
776
            }
777
        }
778
    }
779

    
780
    /* in case the KVM bug workaround already "consumed" the new slot */
781
    if (!size)
782
        return;
783

    
784
    /* KVM does not need to know about this memory */
785
    if (flags >= IO_MEM_UNASSIGNED)
786
        return;
787

    
788
    mem = kvm_alloc_slot(s);
789
    mem->memory_size = size;
790
    mem->start_addr = start_addr;
791
    mem->phys_offset = phys_offset;
792
    mem->flags = 0;
793

    
794
    err = kvm_set_user_memory_region(s, mem);
795
    if (err) {
796
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
797
                strerror(-err));
798
        abort();
799
    }
800
}
801

    
802
int kvm_ioctl(KVMState *s, int type, ...)
803
{
804
    int ret;
805
    void *arg;
806
    va_list ap;
807

    
808
    va_start(ap, type);
809
    arg = va_arg(ap, void *);
810
    va_end(ap);
811

    
812
    ret = ioctl(s->fd, type, arg);
813
    if (ret == -1)
814
        ret = -errno;
815

    
816
    return ret;
817
}
818

    
819
int kvm_vm_ioctl(KVMState *s, int type, ...)
820
{
821
    int ret;
822
    void *arg;
823
    va_list ap;
824

    
825
    va_start(ap, type);
826
    arg = va_arg(ap, void *);
827
    va_end(ap);
828

    
829
    ret = ioctl(s->vmfd, type, arg);
830
    if (ret == -1)
831
        ret = -errno;
832

    
833
    return ret;
834
}
835

    
836
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
837
{
838
    int ret;
839
    void *arg;
840
    va_list ap;
841

    
842
    va_start(ap, type);
843
    arg = va_arg(ap, void *);
844
    va_end(ap);
845

    
846
    ret = ioctl(env->kvm_fd, type, arg);
847
    if (ret == -1)
848
        ret = -errno;
849

    
850
    return ret;
851
}
852

    
853
int kvm_has_sync_mmu(void)
854
{
855
#ifdef KVM_CAP_SYNC_MMU
856
    KVMState *s = kvm_state;
857

    
858
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
859
#else
860
    return 0;
861
#endif
862
}
863

    
864
void kvm_setup_guest_memory(void *start, size_t size)
865
{
866
    if (!kvm_has_sync_mmu()) {
867
#ifdef MADV_DONTFORK
868
        int ret = madvise(start, size, MADV_DONTFORK);
869

    
870
        if (ret) {
871
            perror("madvice");
872
            exit(1);
873
        }
874
#else
875
        fprintf(stderr,
876
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
877
        exit(1);
878
#endif
879
    }
880
}
881

    
882
#ifdef KVM_CAP_SET_GUEST_DEBUG
883
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
884
                                                 target_ulong pc)
885
{
886
    struct kvm_sw_breakpoint *bp;
887

    
888
    TAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
889
        if (bp->pc == pc)
890
            return bp;
891
    }
892
    return NULL;
893
}
894

    
895
int kvm_sw_breakpoints_active(CPUState *env)
896
{
897
    return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
898
}
899

    
900
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
901
{
902
    struct kvm_guest_debug dbg;
903

    
904
    dbg.control = 0;
905
    if (env->singlestep_enabled)
906
        dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
907

    
908
    kvm_arch_update_guest_debug(env, &dbg);
909
    dbg.control |= reinject_trap;
910

    
911
    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
912
}
913

    
914
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
915
                          target_ulong len, int type)
916
{
917
    struct kvm_sw_breakpoint *bp;
918
    CPUState *env;
919
    int err;
920

    
921
    if (type == GDB_BREAKPOINT_SW) {
922
        bp = kvm_find_sw_breakpoint(current_env, addr);
923
        if (bp) {
924
            bp->use_count++;
925
            return 0;
926
        }
927

    
928
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
929
        if (!bp)
930
            return -ENOMEM;
931

    
932
        bp->pc = addr;
933
        bp->use_count = 1;
934
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
935
        if (err) {
936
            free(bp);
937
            return err;
938
        }
939

    
940
        TAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
941
                          bp, entry);
942
    } else {
943
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
944
        if (err)
945
            return err;
946
    }
947

    
948
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
949
        err = kvm_update_guest_debug(env, 0);
950
        if (err)
951
            return err;
952
    }
953
    return 0;
954
}
955

    
956
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
957
                          target_ulong len, int type)
958
{
959
    struct kvm_sw_breakpoint *bp;
960
    CPUState *env;
961
    int err;
962

    
963
    if (type == GDB_BREAKPOINT_SW) {
964
        bp = kvm_find_sw_breakpoint(current_env, addr);
965
        if (!bp)
966
            return -ENOENT;
967

    
968
        if (bp->use_count > 1) {
969
            bp->use_count--;
970
            return 0;
971
        }
972

    
973
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
974
        if (err)
975
            return err;
976

    
977
        TAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
978
        qemu_free(bp);
979
    } else {
980
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
981
        if (err)
982
            return err;
983
    }
984

    
985
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
986
        err = kvm_update_guest_debug(env, 0);
987
        if (err)
988
            return err;
989
    }
990
    return 0;
991
}
992

    
993
void kvm_remove_all_breakpoints(CPUState *current_env)
994
{
995
    struct kvm_sw_breakpoint *bp, *next;
996
    KVMState *s = current_env->kvm_state;
997
    CPUState *env;
998

    
999
    TAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1000
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1001
            /* Try harder to find a CPU that currently sees the breakpoint. */
1002
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
1003
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1004
                    break;
1005
            }
1006
        }
1007
    }
1008
    kvm_arch_remove_all_hw_breakpoints();
1009

    
1010
    for (env = first_cpu; env != NULL; env = env->next_cpu)
1011
        kvm_update_guest_debug(env, 0);
1012
}
1013

    
1014
#else /* !KVM_CAP_SET_GUEST_DEBUG */
1015

    
1016
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1017
{
1018
    return -EINVAL;
1019
}
1020

    
1021
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1022
                          target_ulong len, int type)
1023
{
1024
    return -EINVAL;
1025
}
1026

    
1027
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1028
                          target_ulong len, int type)
1029
{
1030
    return -EINVAL;
1031
}
1032

    
1033
void kvm_remove_all_breakpoints(CPUState *current_env)
1034
{
1035
}
1036
#endif /* !KVM_CAP_SET_GUEST_DEBUG */