Statistics
| Branch: | Revision:

root / kvm-all.c @ 9f8fd694

History | View | Annotate | Download (24.5 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "gdbstub.h"
26
#include "kvm.h"
27

    
28
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
29
#define PAGE_SIZE TARGET_PAGE_SIZE
30

    
31
//#define DEBUG_KVM
32

    
33
#ifdef DEBUG_KVM
34
#define dprintf(fmt, ...) \
35
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
36
#else
37
#define dprintf(fmt, ...) \
38
    do { } while (0)
39
#endif
40

    
41
typedef struct KVMSlot
42
{
43
    target_phys_addr_t start_addr;
44
    ram_addr_t memory_size;
45
    ram_addr_t phys_offset;
46
    int slot;
47
    int flags;
48
} KVMSlot;
49

    
50
typedef struct kvm_dirty_log KVMDirtyLog;
51

    
52
int kvm_allowed = 0;
53

    
54
struct KVMState
55
{
56
    KVMSlot slots[32];
57
    int fd;
58
    int vmfd;
59
    int coalesced_mmio;
60
#ifdef KVM_CAP_SET_GUEST_DEBUG
61
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
62
#endif
63
};
64

    
65
static KVMState *kvm_state;
66

    
67
static KVMSlot *kvm_alloc_slot(KVMState *s)
68
{
69
    int i;
70

    
71
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
72
        /* KVM private memory slots */
73
        if (i >= 8 && i < 12)
74
            continue;
75
        if (s->slots[i].memory_size == 0)
76
            return &s->slots[i];
77
    }
78

    
79
    fprintf(stderr, "%s: no free slot available\n", __func__);
80
    abort();
81
}
82

    
83
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
84
                                         target_phys_addr_t start_addr,
85
                                         target_phys_addr_t end_addr)
86
{
87
    int i;
88

    
89
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
90
        KVMSlot *mem = &s->slots[i];
91

    
92
        if (start_addr == mem->start_addr &&
93
            end_addr == mem->start_addr + mem->memory_size) {
94
            return mem;
95
        }
96
    }
97

    
98
    return NULL;
99
}
100

    
101
/*
102
 * Find overlapping slot with lowest start address
103
 */
104
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
105
                                            target_phys_addr_t start_addr,
106
                                            target_phys_addr_t end_addr)
107
{
108
    KVMSlot *found = NULL;
109
    int i;
110

    
111
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
112
        KVMSlot *mem = &s->slots[i];
113

    
114
        if (mem->memory_size == 0 ||
115
            (found && found->start_addr < mem->start_addr)) {
116
            continue;
117
        }
118

    
119
        if (end_addr > mem->start_addr &&
120
            start_addr < mem->start_addr + mem->memory_size) {
121
            found = mem;
122
        }
123
    }
124

    
125
    return found;
126
}
127

    
128
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
129
{
130
    struct kvm_userspace_memory_region mem;
131

    
132
    mem.slot = slot->slot;
133
    mem.guest_phys_addr = slot->start_addr;
134
    mem.memory_size = slot->memory_size;
135
    mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
136
    mem.flags = slot->flags;
137

    
138
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
139
}
140

    
141

    
142
int kvm_init_vcpu(CPUState *env)
143
{
144
    KVMState *s = kvm_state;
145
    long mmap_size;
146
    int ret;
147

    
148
    dprintf("kvm_init_vcpu\n");
149

    
150
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
151
    if (ret < 0) {
152
        dprintf("kvm_create_vcpu failed\n");
153
        goto err;
154
    }
155

    
156
    env->kvm_fd = ret;
157
    env->kvm_state = s;
158

    
159
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
160
    if (mmap_size < 0) {
161
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
162
        goto err;
163
    }
164

    
165
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
166
                        env->kvm_fd, 0);
167
    if (env->kvm_run == MAP_FAILED) {
168
        ret = -errno;
169
        dprintf("mmap'ing vcpu state failed\n");
170
        goto err;
171
    }
172

    
173
    ret = kvm_arch_init_vcpu(env);
174

    
175
err:
176
    return ret;
177
}
178

    
179
int kvm_sync_vcpus(void)
180
{
181
    CPUState *env;
182

    
183
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
184
        int ret;
185

    
186
        ret = kvm_arch_put_registers(env);
187
        if (ret)
188
            return ret;
189
    }
190

    
191
    return 0;
192
}
193

    
194
/*
195
 * dirty pages logging control
196
 */
197
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
198
                                      ram_addr_t size, unsigned flags,
199
                                      unsigned mask)
200
{
201
    KVMState *s = kvm_state;
202
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
203
    if (mem == NULL)  {
204
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
205
                    TARGET_FMT_plx "\n", __func__, phys_addr,
206
                    phys_addr + size - 1);
207
            return -EINVAL;
208
    }
209

    
210
    flags = (mem->flags & ~mask) | flags;
211
    /* Nothing changed, no need to issue ioctl */
212
    if (flags == mem->flags)
213
            return 0;
214

    
215
    mem->flags = flags;
216

    
217
    return kvm_set_user_memory_region(s, mem);
218
}
219

    
220
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
221
{
222
        return kvm_dirty_pages_log_change(phys_addr, size,
223
                                          KVM_MEM_LOG_DIRTY_PAGES,
224
                                          KVM_MEM_LOG_DIRTY_PAGES);
225
}
226

    
227
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
228
{
229
        return kvm_dirty_pages_log_change(phys_addr, size,
230
                                          0,
231
                                          KVM_MEM_LOG_DIRTY_PAGES);
232
}
233

    
234
/**
235
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
236
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
237
 * This means all bits are set to dirty.
238
 *
239
 * @start_add: start of logged region.
240
 * @end_addr: end of logged region.
241
 */
242
void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
243
                                    target_phys_addr_t end_addr)
244
{
245
    KVMState *s = kvm_state;
246
    KVMDirtyLog d;
247
    KVMSlot *mem = kvm_lookup_matching_slot(s, start_addr, end_addr);
248
    unsigned long alloc_size;
249
    ram_addr_t addr;
250
    target_phys_addr_t phys_addr = start_addr;
251

    
252
    dprintf("sync addr: " TARGET_FMT_lx " into %lx\n", start_addr,
253
            mem->phys_offset);
254
    if (mem == NULL) {
255
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
256
                    TARGET_FMT_plx "\n", __func__, phys_addr, end_addr - 1);
257
            return;
258
    }
259

    
260
    alloc_size = mem->memory_size >> TARGET_PAGE_BITS / sizeof(d.dirty_bitmap);
261
    d.dirty_bitmap = qemu_mallocz(alloc_size);
262

    
263
    d.slot = mem->slot;
264
    dprintf("slot %d, phys_addr %llx, uaddr: %llx\n",
265
            d.slot, mem->start_addr, mem->phys_offset);
266

    
267
    if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
268
        dprintf("ioctl failed %d\n", errno);
269
        goto out;
270
    }
271

    
272
    phys_addr = start_addr;
273
    for (addr = mem->phys_offset; phys_addr < end_addr; phys_addr+= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
274
        unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
275
        unsigned nr = (phys_addr - start_addr) >> TARGET_PAGE_BITS;
276
        unsigned word = nr / (sizeof(*bitmap) * 8);
277
        unsigned bit = nr % (sizeof(*bitmap) * 8);
278
        if ((bitmap[word] >> bit) & 1)
279
            cpu_physical_memory_set_dirty(addr);
280
    }
281
out:
282
    qemu_free(d.dirty_bitmap);
283
}
284

    
285
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
286
{
287
    int ret = -ENOSYS;
288
#ifdef KVM_CAP_COALESCED_MMIO
289
    KVMState *s = kvm_state;
290

    
291
    if (s->coalesced_mmio) {
292
        struct kvm_coalesced_mmio_zone zone;
293

    
294
        zone.addr = start;
295
        zone.size = size;
296

    
297
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
298
    }
299
#endif
300

    
301
    return ret;
302
}
303

    
304
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
305
{
306
    int ret = -ENOSYS;
307
#ifdef KVM_CAP_COALESCED_MMIO
308
    KVMState *s = kvm_state;
309

    
310
    if (s->coalesced_mmio) {
311
        struct kvm_coalesced_mmio_zone zone;
312

    
313
        zone.addr = start;
314
        zone.size = size;
315

    
316
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
317
    }
318
#endif
319

    
320
    return ret;
321
}
322

    
323
int kvm_check_extension(KVMState *s, unsigned int extension)
324
{
325
    int ret;
326

    
327
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
328
    if (ret < 0) {
329
        ret = 0;
330
    }
331

    
332
    return ret;
333
}
334

    
335
int kvm_init(int smp_cpus)
336
{
337
    KVMState *s;
338
    int ret;
339
    int i;
340

    
341
    if (smp_cpus > 1) {
342
        fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
343
        return -EINVAL;
344
    }
345

    
346
    s = qemu_mallocz(sizeof(KVMState));
347

    
348
#ifdef KVM_CAP_SET_GUEST_DEBUG
349
    TAILQ_INIT(&s->kvm_sw_breakpoints);
350
#endif
351
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
352
        s->slots[i].slot = i;
353

    
354
    s->vmfd = -1;
355
    s->fd = open("/dev/kvm", O_RDWR);
356
    if (s->fd == -1) {
357
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
358
        ret = -errno;
359
        goto err;
360
    }
361

    
362
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
363
    if (ret < KVM_API_VERSION) {
364
        if (ret > 0)
365
            ret = -EINVAL;
366
        fprintf(stderr, "kvm version too old\n");
367
        goto err;
368
    }
369

    
370
    if (ret > KVM_API_VERSION) {
371
        ret = -EINVAL;
372
        fprintf(stderr, "kvm version not supported\n");
373
        goto err;
374
    }
375

    
376
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
377
    if (s->vmfd < 0)
378
        goto err;
379

    
380
    /* initially, KVM allocated its own memory and we had to jump through
381
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
382
     * just use a user allocated buffer so we can use regular pages
383
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
384
     */
385
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
386
        ret = -EINVAL;
387
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
388
        goto err;
389
    }
390

    
391
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
392
     * destroyed properly.  Since we rely on this capability, refuse to work
393
     * with any kernel without this capability. */
394
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
395
        ret = -EINVAL;
396

    
397
        fprintf(stderr,
398
                "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
399
                "Please upgrade to at least kvm-81.\n");
400
        goto err;
401
    }
402

    
403
#ifdef KVM_CAP_COALESCED_MMIO
404
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
405
#else
406
    s->coalesced_mmio = 0;
407
#endif
408

    
409
    ret = kvm_arch_init(s, smp_cpus);
410
    if (ret < 0)
411
        goto err;
412

    
413
    kvm_state = s;
414

    
415
    return 0;
416

    
417
err:
418
    if (s) {
419
        if (s->vmfd != -1)
420
            close(s->vmfd);
421
        if (s->fd != -1)
422
            close(s->fd);
423
    }
424
    qemu_free(s);
425

    
426
    return ret;
427
}
428

    
429
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
430
                         int direction, int size, uint32_t count)
431
{
432
    int i;
433
    uint8_t *ptr = data;
434

    
435
    for (i = 0; i < count; i++) {
436
        if (direction == KVM_EXIT_IO_IN) {
437
            switch (size) {
438
            case 1:
439
                stb_p(ptr, cpu_inb(env, port));
440
                break;
441
            case 2:
442
                stw_p(ptr, cpu_inw(env, port));
443
                break;
444
            case 4:
445
                stl_p(ptr, cpu_inl(env, port));
446
                break;
447
            }
448
        } else {
449
            switch (size) {
450
            case 1:
451
                cpu_outb(env, port, ldub_p(ptr));
452
                break;
453
            case 2:
454
                cpu_outw(env, port, lduw_p(ptr));
455
                break;
456
            case 4:
457
                cpu_outl(env, port, ldl_p(ptr));
458
                break;
459
            }
460
        }
461

    
462
        ptr += size;
463
    }
464

    
465
    return 1;
466
}
467

    
468
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
469
{
470
#ifdef KVM_CAP_COALESCED_MMIO
471
    KVMState *s = kvm_state;
472
    if (s->coalesced_mmio) {
473
        struct kvm_coalesced_mmio_ring *ring;
474

    
475
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
476
        while (ring->first != ring->last) {
477
            struct kvm_coalesced_mmio *ent;
478

    
479
            ent = &ring->coalesced_mmio[ring->first];
480

    
481
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
482
            /* FIXME smp_wmb() */
483
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
484
        }
485
    }
486
#endif
487
}
488

    
489
int kvm_cpu_exec(CPUState *env)
490
{
491
    struct kvm_run *run = env->kvm_run;
492
    int ret;
493

    
494
    dprintf("kvm_cpu_exec()\n");
495

    
496
    do {
497
        kvm_arch_pre_run(env, run);
498

    
499
        if (env->exit_request) {
500
            dprintf("interrupt exit requested\n");
501
            ret = 0;
502
            break;
503
        }
504

    
505
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
506
        kvm_arch_post_run(env, run);
507

    
508
        if (ret == -EINTR || ret == -EAGAIN) {
509
            dprintf("io window exit\n");
510
            ret = 0;
511
            break;
512
        }
513

    
514
        if (ret < 0) {
515
            dprintf("kvm run failed %s\n", strerror(-ret));
516
            abort();
517
        }
518

    
519
        kvm_run_coalesced_mmio(env, run);
520

    
521
        ret = 0; /* exit loop */
522
        switch (run->exit_reason) {
523
        case KVM_EXIT_IO:
524
            dprintf("handle_io\n");
525
            ret = kvm_handle_io(env, run->io.port,
526
                                (uint8_t *)run + run->io.data_offset,
527
                                run->io.direction,
528
                                run->io.size,
529
                                run->io.count);
530
            break;
531
        case KVM_EXIT_MMIO:
532
            dprintf("handle_mmio\n");
533
            cpu_physical_memory_rw(run->mmio.phys_addr,
534
                                   run->mmio.data,
535
                                   run->mmio.len,
536
                                   run->mmio.is_write);
537
            ret = 1;
538
            break;
539
        case KVM_EXIT_IRQ_WINDOW_OPEN:
540
            dprintf("irq_window_open\n");
541
            break;
542
        case KVM_EXIT_SHUTDOWN:
543
            dprintf("shutdown\n");
544
            qemu_system_reset_request();
545
            ret = 1;
546
            break;
547
        case KVM_EXIT_UNKNOWN:
548
            dprintf("kvm_exit_unknown\n");
549
            break;
550
        case KVM_EXIT_FAIL_ENTRY:
551
            dprintf("kvm_exit_fail_entry\n");
552
            break;
553
        case KVM_EXIT_EXCEPTION:
554
            dprintf("kvm_exit_exception\n");
555
            break;
556
        case KVM_EXIT_DEBUG:
557
            dprintf("kvm_exit_debug\n");
558
#ifdef KVM_CAP_SET_GUEST_DEBUG
559
            if (kvm_arch_debug(&run->debug.arch)) {
560
                gdb_set_stop_cpu(env);
561
                vm_stop(EXCP_DEBUG);
562
                env->exception_index = EXCP_DEBUG;
563
                return 0;
564
            }
565
            /* re-enter, this exception was guest-internal */
566
            ret = 1;
567
#endif /* KVM_CAP_SET_GUEST_DEBUG */
568
            break;
569
        default:
570
            dprintf("kvm_arch_handle_exit\n");
571
            ret = kvm_arch_handle_exit(env, run);
572
            break;
573
        }
574
    } while (ret > 0);
575

    
576
    if (env->exit_request) {
577
        env->exit_request = 0;
578
        env->exception_index = EXCP_INTERRUPT;
579
    }
580

    
581
    return ret;
582
}
583

    
584
void kvm_set_phys_mem(target_phys_addr_t start_addr,
585
                      ram_addr_t size,
586
                      ram_addr_t phys_offset)
587
{
588
    KVMState *s = kvm_state;
589
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
590
    KVMSlot *mem, old;
591
    int err;
592

    
593
    if (start_addr & ~TARGET_PAGE_MASK) {
594
        if (flags >= IO_MEM_UNASSIGNED) {
595
            if (!kvm_lookup_overlapping_slot(s, start_addr,
596
                                             start_addr + size)) {
597
                return;
598
            }
599
            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
600
        } else {
601
            fprintf(stderr, "Only page-aligned memory slots supported\n");
602
        }
603
        abort();
604
    }
605

    
606
    /* KVM does not support read-only slots */
607
    phys_offset &= ~IO_MEM_ROM;
608

    
609
    while (1) {
610
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
611
        if (!mem) {
612
            break;
613
        }
614

    
615
        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
616
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
617
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
618
            /* The new slot fits into the existing one and comes with
619
             * identical parameters - nothing to be done. */
620
            return;
621
        }
622

    
623
        old = *mem;
624

    
625
        /* unregister the overlapping slot */
626
        mem->memory_size = 0;
627
        err = kvm_set_user_memory_region(s, mem);
628
        if (err) {
629
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
630
                    __func__, strerror(-err));
631
            abort();
632
        }
633

    
634
        /* Workaround for older KVM versions: we can't join slots, even not by
635
         * unregistering the previous ones and then registering the larger
636
         * slot. We have to maintain the existing fragmentation. Sigh.
637
         *
638
         * This workaround assumes that the new slot starts at the same
639
         * address as the first existing one. If not or if some overlapping
640
         * slot comes around later, we will fail (not seen in practice so far)
641
         * - and actually require a recent KVM version. */
642
        if (old.start_addr == start_addr && old.memory_size < size &&
643
            flags < IO_MEM_UNASSIGNED) {
644
            mem = kvm_alloc_slot(s);
645
            mem->memory_size = old.memory_size;
646
            mem->start_addr = old.start_addr;
647
            mem->phys_offset = old.phys_offset;
648
            mem->flags = 0;
649

    
650
            err = kvm_set_user_memory_region(s, mem);
651
            if (err) {
652
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
653
                        strerror(-err));
654
                abort();
655
            }
656

    
657
            start_addr += old.memory_size;
658
            phys_offset += old.memory_size;
659
            size -= old.memory_size;
660
            continue;
661
        }
662

    
663
        /* register prefix slot */
664
        if (old.start_addr < start_addr) {
665
            mem = kvm_alloc_slot(s);
666
            mem->memory_size = start_addr - old.start_addr;
667
            mem->start_addr = old.start_addr;
668
            mem->phys_offset = old.phys_offset;
669
            mem->flags = 0;
670

    
671
            err = kvm_set_user_memory_region(s, mem);
672
            if (err) {
673
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
674
                        __func__, strerror(-err));
675
                abort();
676
            }
677
        }
678

    
679
        /* register suffix slot */
680
        if (old.start_addr + old.memory_size > start_addr + size) {
681
            ram_addr_t size_delta;
682

    
683
            mem = kvm_alloc_slot(s);
684
            mem->start_addr = start_addr + size;
685
            size_delta = mem->start_addr - old.start_addr;
686
            mem->memory_size = old.memory_size - size_delta;
687
            mem->phys_offset = old.phys_offset + size_delta;
688
            mem->flags = 0;
689

    
690
            err = kvm_set_user_memory_region(s, mem);
691
            if (err) {
692
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
693
                        __func__, strerror(-err));
694
                abort();
695
            }
696
        }
697
    }
698

    
699
    /* in case the KVM bug workaround already "consumed" the new slot */
700
    if (!size)
701
        return;
702

    
703
    /* KVM does not need to know about this memory */
704
    if (flags >= IO_MEM_UNASSIGNED)
705
        return;
706

    
707
    mem = kvm_alloc_slot(s);
708
    mem->memory_size = size;
709
    mem->start_addr = start_addr;
710
    mem->phys_offset = phys_offset;
711
    mem->flags = 0;
712

    
713
    err = kvm_set_user_memory_region(s, mem);
714
    if (err) {
715
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
716
                strerror(-err));
717
        abort();
718
    }
719
}
720

    
721
int kvm_ioctl(KVMState *s, int type, ...)
722
{
723
    int ret;
724
    void *arg;
725
    va_list ap;
726

    
727
    va_start(ap, type);
728
    arg = va_arg(ap, void *);
729
    va_end(ap);
730

    
731
    ret = ioctl(s->fd, type, arg);
732
    if (ret == -1)
733
        ret = -errno;
734

    
735
    return ret;
736
}
737

    
738
int kvm_vm_ioctl(KVMState *s, int type, ...)
739
{
740
    int ret;
741
    void *arg;
742
    va_list ap;
743

    
744
    va_start(ap, type);
745
    arg = va_arg(ap, void *);
746
    va_end(ap);
747

    
748
    ret = ioctl(s->vmfd, type, arg);
749
    if (ret == -1)
750
        ret = -errno;
751

    
752
    return ret;
753
}
754

    
755
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
756
{
757
    int ret;
758
    void *arg;
759
    va_list ap;
760

    
761
    va_start(ap, type);
762
    arg = va_arg(ap, void *);
763
    va_end(ap);
764

    
765
    ret = ioctl(env->kvm_fd, type, arg);
766
    if (ret == -1)
767
        ret = -errno;
768

    
769
    return ret;
770
}
771

    
772
int kvm_has_sync_mmu(void)
773
{
774
#ifdef KVM_CAP_SYNC_MMU
775
    KVMState *s = kvm_state;
776

    
777
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
778
#else
779
    return 0;
780
#endif
781
}
782

    
783
void kvm_setup_guest_memory(void *start, size_t size)
784
{
785
    if (!kvm_has_sync_mmu()) {
786
#ifdef MADV_DONTFORK
787
        int ret = madvise(start, size, MADV_DONTFORK);
788

    
789
        if (ret) {
790
            perror("madvice");
791
            exit(1);
792
        }
793
#else
794
        fprintf(stderr,
795
                "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
796
        exit(1);
797
#endif
798
    }
799
}
800

    
801
#ifdef KVM_CAP_SET_GUEST_DEBUG
802
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
803
                                                 target_ulong pc)
804
{
805
    struct kvm_sw_breakpoint *bp;
806

    
807
    TAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
808
        if (bp->pc == pc)
809
            return bp;
810
    }
811
    return NULL;
812
}
813

    
814
int kvm_sw_breakpoints_active(CPUState *env)
815
{
816
    return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
817
}
818

    
819
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
820
{
821
    struct kvm_guest_debug dbg;
822

    
823
    dbg.control = 0;
824
    if (env->singlestep_enabled)
825
        dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
826

    
827
    kvm_arch_update_guest_debug(env, &dbg);
828
    dbg.control |= reinject_trap;
829

    
830
    return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
831
}
832

    
833
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
834
                          target_ulong len, int type)
835
{
836
    struct kvm_sw_breakpoint *bp;
837
    CPUState *env;
838
    int err;
839

    
840
    if (type == GDB_BREAKPOINT_SW) {
841
        bp = kvm_find_sw_breakpoint(current_env, addr);
842
        if (bp) {
843
            bp->use_count++;
844
            return 0;
845
        }
846

    
847
        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
848
        if (!bp)
849
            return -ENOMEM;
850

    
851
        bp->pc = addr;
852
        bp->use_count = 1;
853
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
854
        if (err) {
855
            free(bp);
856
            return err;
857
        }
858

    
859
        TAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
860
                          bp, entry);
861
    } else {
862
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
863
        if (err)
864
            return err;
865
    }
866

    
867
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
868
        err = kvm_update_guest_debug(env, 0);
869
        if (err)
870
            return err;
871
    }
872
    return 0;
873
}
874

    
875
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
876
                          target_ulong len, int type)
877
{
878
    struct kvm_sw_breakpoint *bp;
879
    CPUState *env;
880
    int err;
881

    
882
    if (type == GDB_BREAKPOINT_SW) {
883
        bp = kvm_find_sw_breakpoint(current_env, addr);
884
        if (!bp)
885
            return -ENOENT;
886

    
887
        if (bp->use_count > 1) {
888
            bp->use_count--;
889
            return 0;
890
        }
891

    
892
        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
893
        if (err)
894
            return err;
895

    
896
        TAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
897
        qemu_free(bp);
898
    } else {
899
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
900
        if (err)
901
            return err;
902
    }
903

    
904
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
905
        err = kvm_update_guest_debug(env, 0);
906
        if (err)
907
            return err;
908
    }
909
    return 0;
910
}
911

    
912
void kvm_remove_all_breakpoints(CPUState *current_env)
913
{
914
    struct kvm_sw_breakpoint *bp, *next;
915
    KVMState *s = current_env->kvm_state;
916
    CPUState *env;
917

    
918
    TAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
919
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
920
            /* Try harder to find a CPU that currently sees the breakpoint. */
921
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
922
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
923
                    break;
924
            }
925
        }
926
    }
927
    kvm_arch_remove_all_hw_breakpoints();
928

    
929
    for (env = first_cpu; env != NULL; env = env->next_cpu)
930
        kvm_update_guest_debug(env, 0);
931
}
932

    
933
#else /* !KVM_CAP_SET_GUEST_DEBUG */
934

    
935
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
936
{
937
    return -EINVAL;
938
}
939

    
940
int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
941
                          target_ulong len, int type)
942
{
943
    return -EINVAL;
944
}
945

    
946
int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
947
                          target_ulong len, int type)
948
{
949
    return -EINVAL;
950
}
951

    
952
void kvm_remove_all_breakpoints(CPUState *current_env)
953
{
954
}
955
#endif /* !KVM_CAP_SET_GUEST_DEBUG */