Statistics
| Branch: | Revision:

root / kvm-all.c @ 79f85c3a

History | View | Annotate | Download (16.4 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "kvm.h"
26

    
27
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
28
#define PAGE_SIZE TARGET_PAGE_SIZE
29

    
30
//#define DEBUG_KVM
31

    
32
#ifdef DEBUG_KVM
33
#define dprintf(fmt, ...) \
34
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
35
#else
36
#define dprintf(fmt, ...) \
37
    do { } while (0)
38
#endif
39

    
40
typedef struct KVMSlot
41
{
42
    target_phys_addr_t start_addr;
43
    ram_addr_t memory_size;
44
    ram_addr_t phys_offset;
45
    int slot;
46
    int flags;
47
} KVMSlot;
48

    
49
typedef struct kvm_dirty_log KVMDirtyLog;
50

    
51
int kvm_allowed = 0;
52

    
53
struct KVMState
54
{
55
    KVMSlot slots[32];
56
    int fd;
57
    int vmfd;
58
    int coalesced_mmio;
59
};
60

    
61
static KVMState *kvm_state;
62

    
63
static KVMSlot *kvm_alloc_slot(KVMState *s)
64
{
65
    int i;
66

    
67
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
68
        /* KVM private memory slots */
69
        if (i >= 8 && i < 12)
70
            continue;
71
        if (s->slots[i].memory_size == 0)
72
            return &s->slots[i];
73
    }
74

    
75
    return NULL;
76
}
77

    
78
static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr)
79
{
80
    int i;
81

    
82
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
83
        KVMSlot *mem = &s->slots[i];
84

    
85
        if (start_addr >= mem->start_addr &&
86
            start_addr < (mem->start_addr + mem->memory_size))
87
            return mem;
88
    }
89

    
90
    return NULL;
91
}
92

    
93
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
94
{
95
    struct kvm_userspace_memory_region mem;
96

    
97
    mem.slot = slot->slot;
98
    mem.guest_phys_addr = slot->start_addr;
99
    mem.memory_size = slot->memory_size;
100
    mem.userspace_addr = (unsigned long)phys_ram_base + slot->phys_offset;
101
    mem.flags = slot->flags;
102

    
103
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
104
}
105

    
106

    
107
int kvm_init_vcpu(CPUState *env)
108
{
109
    KVMState *s = kvm_state;
110
    long mmap_size;
111
    int ret;
112

    
113
    dprintf("kvm_init_vcpu\n");
114

    
115
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
116
    if (ret < 0) {
117
        dprintf("kvm_create_vcpu failed\n");
118
        goto err;
119
    }
120

    
121
    env->kvm_fd = ret;
122
    env->kvm_state = s;
123

    
124
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
125
    if (mmap_size < 0) {
126
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
127
        goto err;
128
    }
129

    
130
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
131
                        env->kvm_fd, 0);
132
    if (env->kvm_run == MAP_FAILED) {
133
        ret = -errno;
134
        dprintf("mmap'ing vcpu state failed\n");
135
        goto err;
136
    }
137

    
138
    ret = kvm_arch_init_vcpu(env);
139

    
140
err:
141
    return ret;
142
}
143

    
144
int kvm_sync_vcpus(void)
145
{
146
    CPUState *env;
147

    
148
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
149
        int ret;
150

    
151
        ret = kvm_arch_put_registers(env);
152
        if (ret)
153
            return ret;
154
    }
155

    
156
    return 0;
157
}
158

    
159
/*
160
 * dirty pages logging control
161
 */
162
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, target_phys_addr_t end_addr,
163
                                      unsigned flags,
164
                                      unsigned mask)
165
{
166
    KVMState *s = kvm_state;
167
    KVMSlot *mem = kvm_lookup_slot(s, phys_addr);
168
    if (mem == NULL)  {
169
            dprintf("invalid parameters %llx-%llx\n", phys_addr, end_addr);
170
            return -EINVAL;
171
    }
172

    
173
    flags = (mem->flags & ~mask) | flags;
174
    /* Nothing changed, no need to issue ioctl */
175
    if (flags == mem->flags)
176
            return 0;
177

    
178
    mem->flags = flags;
179

    
180
    return kvm_set_user_memory_region(s, mem);
181
}
182

    
183
int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
184
{
185
        return kvm_dirty_pages_log_change(phys_addr, end_addr,
186
                                          KVM_MEM_LOG_DIRTY_PAGES,
187
                                          KVM_MEM_LOG_DIRTY_PAGES);
188
}
189

    
190
int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
191
{
192
        return kvm_dirty_pages_log_change(phys_addr, end_addr,
193
                                          0,
194
                                          KVM_MEM_LOG_DIRTY_PAGES);
195
}
196

    
197
/**
198
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
199
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
200
 * This means all bits are set to dirty.
201
 *
202
 * @start_add: start of logged region. This is what we use to search the memslot
203
 * @end_addr: end of logged region.
204
 */
205
void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
206
{
207
    KVMState *s = kvm_state;
208
    KVMDirtyLog d;
209
    KVMSlot *mem = kvm_lookup_slot(s, start_addr);
210
    unsigned long alloc_size;
211
    ram_addr_t addr;
212
    target_phys_addr_t phys_addr = start_addr;
213

    
214
    dprintf("sync addr: %llx into %lx\n", start_addr, mem->phys_offset);
215
    if (mem == NULL) {
216
            fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
217
            return;
218
    }
219

    
220
    alloc_size = mem->memory_size >> TARGET_PAGE_BITS / sizeof(d.dirty_bitmap);
221
    d.dirty_bitmap = qemu_mallocz(alloc_size);
222

    
223
    if (d.dirty_bitmap == NULL) {
224
        dprintf("Could not allocate dirty bitmap\n");
225
        return;
226
    }
227

    
228
    d.slot = mem->slot;
229
    dprintf("slot %d, phys_addr %llx, uaddr: %llx\n",
230
            d.slot, mem->start_addr, mem->phys_offset);
231

    
232
    if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
233
        dprintf("ioctl failed %d\n", errno);
234
        goto out;
235
    }
236

    
237
    phys_addr = start_addr;
238
    for (addr = mem->phys_offset; phys_addr < end_addr; phys_addr+= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
239
        unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
240
        unsigned nr = (phys_addr - start_addr) >> TARGET_PAGE_BITS;
241
        unsigned word = nr / (sizeof(*bitmap) * 8);
242
        unsigned bit = nr % (sizeof(*bitmap) * 8);
243
        if ((bitmap[word] >> bit) & 1)
244
            cpu_physical_memory_set_dirty(addr);
245
    }
246
out:
247
    qemu_free(d.dirty_bitmap);
248
}
249

    
250
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
251
{
252
    int ret = -ENOSYS;
253
#ifdef KVM_CAP_COALESCED_MMIO
254
    KVMState *s = kvm_state;
255

    
256
    if (s->coalesced_mmio) {
257
        struct kvm_coalesced_mmio_zone zone;
258

    
259
        zone.addr = start;
260
        zone.size = size;
261

    
262
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
263
    }
264
#endif
265

    
266
    return ret;
267
}
268

    
269
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
270
{
271
    int ret = -ENOSYS;
272
#ifdef KVM_CAP_COALESCED_MMIO
273
    KVMState *s = kvm_state;
274

    
275
    if (s->coalesced_mmio) {
276
        struct kvm_coalesced_mmio_zone zone;
277

    
278
        zone.addr = start;
279
        zone.size = size;
280

    
281
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
282
    }
283
#endif
284

    
285
    return ret;
286
}
287

    
288
int kvm_init(int smp_cpus)
289
{
290
    KVMState *s;
291
    int ret;
292
    int i;
293

    
294
    if (smp_cpus > 1)
295
        return -EINVAL;
296

    
297
    s = qemu_mallocz(sizeof(KVMState));
298
    if (s == NULL)
299
        return -ENOMEM;
300

    
301
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
302
        s->slots[i].slot = i;
303

    
304
    s->vmfd = -1;
305
    s->fd = open("/dev/kvm", O_RDWR);
306
    if (s->fd == -1) {
307
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
308
        ret = -errno;
309
        goto err;
310
    }
311

    
312
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
313
    if (ret < KVM_API_VERSION) {
314
        if (ret > 0)
315
            ret = -EINVAL;
316
        fprintf(stderr, "kvm version too old\n");
317
        goto err;
318
    }
319

    
320
    if (ret > KVM_API_VERSION) {
321
        ret = -EINVAL;
322
        fprintf(stderr, "kvm version not supported\n");
323
        goto err;
324
    }
325

    
326
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
327
    if (s->vmfd < 0)
328
        goto err;
329

    
330
    /* initially, KVM allocated its own memory and we had to jump through
331
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
332
     * just use a user allocated buffer so we can use phys_ram_base
333
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
334
     */
335
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
336
    if (ret <= 0) {
337
        if (ret == 0)
338
            ret = -EINVAL;
339
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
340
        goto err;
341
    }
342

    
343
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
344
     * destroyed properly.  Since we rely on this capability, refuse to work
345
     * with any kernel without this capability. */
346
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION,
347
                    KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
348
    if (ret <= 0) {
349
        if (ret == 0)
350
            ret = -EINVAL;
351

    
352
        fprintf(stderr,
353
                "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
354
                "Please upgrade to at least kvm-81.\n");
355
        goto err;
356
    }
357

    
358
    s->coalesced_mmio = 0;
359
#ifdef KVM_CAP_COALESCED_MMIO
360
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
361
    if (ret > 0)
362
        s->coalesced_mmio = ret;
363
#endif
364

    
365
    ret = kvm_arch_init(s, smp_cpus);
366
    if (ret < 0)
367
        goto err;
368

    
369
    kvm_state = s;
370

    
371
    return 0;
372

    
373
err:
374
    if (s) {
375
        if (s->vmfd != -1)
376
            close(s->vmfd);
377
        if (s->fd != -1)
378
            close(s->fd);
379
    }
380
    qemu_free(s);
381

    
382
    return ret;
383
}
384

    
385
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
386
                         int direction, int size, uint32_t count)
387
{
388
    int i;
389
    uint8_t *ptr = data;
390

    
391
    for (i = 0; i < count; i++) {
392
        if (direction == KVM_EXIT_IO_IN) {
393
            switch (size) {
394
            case 1:
395
                stb_p(ptr, cpu_inb(env, port));
396
                break;
397
            case 2:
398
                stw_p(ptr, cpu_inw(env, port));
399
                break;
400
            case 4:
401
                stl_p(ptr, cpu_inl(env, port));
402
                break;
403
            }
404
        } else {
405
            switch (size) {
406
            case 1:
407
                cpu_outb(env, port, ldub_p(ptr));
408
                break;
409
            case 2:
410
                cpu_outw(env, port, lduw_p(ptr));
411
                break;
412
            case 4:
413
                cpu_outl(env, port, ldl_p(ptr));
414
                break;
415
            }
416
        }
417

    
418
        ptr += size;
419
    }
420

    
421
    return 1;
422
}
423

    
424
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
425
{
426
#ifdef KVM_CAP_COALESCED_MMIO
427
    KVMState *s = kvm_state;
428
    if (s->coalesced_mmio) {
429
        struct kvm_coalesced_mmio_ring *ring;
430

    
431
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
432
        while (ring->first != ring->last) {
433
            struct kvm_coalesced_mmio *ent;
434

    
435
            ent = &ring->coalesced_mmio[ring->first];
436

    
437
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
438
            /* FIXME smp_wmb() */
439
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
440
        }
441
    }
442
#endif
443
}
444

    
445
int kvm_cpu_exec(CPUState *env)
446
{
447
    struct kvm_run *run = env->kvm_run;
448
    int ret;
449

    
450
    dprintf("kvm_cpu_exec()\n");
451

    
452
    do {
453
        kvm_arch_pre_run(env, run);
454

    
455
        if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
456
            dprintf("interrupt exit requested\n");
457
            ret = 0;
458
            break;
459
        }
460

    
461
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
462
        kvm_arch_post_run(env, run);
463

    
464
        if (ret == -EINTR || ret == -EAGAIN) {
465
            dprintf("io window exit\n");
466
            ret = 0;
467
            break;
468
        }
469

    
470
        if (ret < 0) {
471
            dprintf("kvm run failed %s\n", strerror(-ret));
472
            abort();
473
        }
474

    
475
        kvm_run_coalesced_mmio(env, run);
476

    
477
        ret = 0; /* exit loop */
478
        switch (run->exit_reason) {
479
        case KVM_EXIT_IO:
480
            dprintf("handle_io\n");
481
            ret = kvm_handle_io(env, run->io.port,
482
                                (uint8_t *)run + run->io.data_offset,
483
                                run->io.direction,
484
                                run->io.size,
485
                                run->io.count);
486
            break;
487
        case KVM_EXIT_MMIO:
488
            dprintf("handle_mmio\n");
489
            cpu_physical_memory_rw(run->mmio.phys_addr,
490
                                   run->mmio.data,
491
                                   run->mmio.len,
492
                                   run->mmio.is_write);
493
            ret = 1;
494
            break;
495
        case KVM_EXIT_IRQ_WINDOW_OPEN:
496
            dprintf("irq_window_open\n");
497
            break;
498
        case KVM_EXIT_SHUTDOWN:
499
            dprintf("shutdown\n");
500
            qemu_system_reset_request();
501
            ret = 1;
502
            break;
503
        case KVM_EXIT_UNKNOWN:
504
            dprintf("kvm_exit_unknown\n");
505
            break;
506
        case KVM_EXIT_FAIL_ENTRY:
507
            dprintf("kvm_exit_fail_entry\n");
508
            break;
509
        case KVM_EXIT_EXCEPTION:
510
            dprintf("kvm_exit_exception\n");
511
            break;
512
        case KVM_EXIT_DEBUG:
513
            dprintf("kvm_exit_debug\n");
514
            break;
515
        default:
516
            dprintf("kvm_arch_handle_exit\n");
517
            ret = kvm_arch_handle_exit(env, run);
518
            break;
519
        }
520
    } while (ret > 0);
521

    
522
    if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
523
        env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
524
        env->exception_index = EXCP_INTERRUPT;
525
    }
526

    
527
    return ret;
528
}
529

    
530
void kvm_set_phys_mem(target_phys_addr_t start_addr,
531
                      ram_addr_t size,
532
                      ram_addr_t phys_offset)
533
{
534
    KVMState *s = kvm_state;
535
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
536
    KVMSlot *mem;
537

    
538
    /* KVM does not support read-only slots */
539
    phys_offset &= ~IO_MEM_ROM;
540

    
541
    mem = kvm_lookup_slot(s, start_addr);
542
    if (mem) {
543
        if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) {
544
            mem->memory_size = 0;
545
            mem->start_addr = start_addr;
546
            mem->phys_offset = 0;
547
            mem->flags = 0;
548

    
549
            kvm_set_user_memory_region(s, mem);
550
        } else if (start_addr >= mem->start_addr &&
551
                   (start_addr + size) <= (mem->start_addr +
552
                                           mem->memory_size)) {
553
            KVMSlot slot;
554
            target_phys_addr_t mem_start;
555
            ram_addr_t mem_size, mem_offset;
556

    
557
            /* Not splitting */
558
            if ((phys_offset - (start_addr - mem->start_addr)) == 
559
                mem->phys_offset)
560
                return;
561

    
562
            /* unregister whole slot */
563
            memcpy(&slot, mem, sizeof(slot));
564
            mem->memory_size = 0;
565
            kvm_set_user_memory_region(s, mem);
566

    
567
            /* register prefix slot */
568
            mem_start = slot.start_addr;
569
            mem_size = start_addr - slot.start_addr;
570
            mem_offset = slot.phys_offset;
571
            if (mem_size)
572
                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
573

    
574
            /* register new slot */
575
            kvm_set_phys_mem(start_addr, size, phys_offset);
576

    
577
            /* register suffix slot */
578
            mem_start = start_addr + size;
579
            mem_offset += mem_size + size;
580
            mem_size = slot.memory_size - mem_size - size;
581
            if (mem_size)
582
                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
583

    
584
            return;
585
        } else {
586
            printf("Registering overlapping slot\n");
587
            abort();
588
        }
589
    }
590
    /* KVM does not need to know about this memory */
591
    if (flags >= IO_MEM_UNASSIGNED)
592
        return;
593

    
594
    mem = kvm_alloc_slot(s);
595
    mem->memory_size = size;
596
    mem->start_addr = start_addr;
597
    mem->phys_offset = phys_offset;
598
    mem->flags = 0;
599

    
600
    kvm_set_user_memory_region(s, mem);
601
    /* FIXME deal with errors */
602
}
603

    
604
int kvm_ioctl(KVMState *s, int type, ...)
605
{
606
    int ret;
607
    void *arg;
608
    va_list ap;
609

    
610
    va_start(ap, type);
611
    arg = va_arg(ap, void *);
612
    va_end(ap);
613

    
614
    ret = ioctl(s->fd, type, arg);
615
    if (ret == -1)
616
        ret = -errno;
617

    
618
    return ret;
619
}
620

    
621
int kvm_vm_ioctl(KVMState *s, int type, ...)
622
{
623
    int ret;
624
    void *arg;
625
    va_list ap;
626

    
627
    va_start(ap, type);
628
    arg = va_arg(ap, void *);
629
    va_end(ap);
630

    
631
    ret = ioctl(s->vmfd, type, arg);
632
    if (ret == -1)
633
        ret = -errno;
634

    
635
    return ret;
636
}
637

    
638
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
639
{
640
    int ret;
641
    void *arg;
642
    va_list ap;
643

    
644
    va_start(ap, type);
645
    arg = va_arg(ap, void *);
646
    va_end(ap);
647

    
648
    ret = ioctl(env->kvm_fd, type, arg);
649
    if (ret == -1)
650
        ret = -errno;
651

    
652
    return ret;
653
}
654

    
655
int kvm_has_sync_mmu(void)
656
{
657
#ifdef KVM_CAP_SYNC_MMU
658
    KVMState *s = kvm_state;
659

    
660
    if (kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0)
661
        return 1;
662
#endif
663

    
664
    return 0;
665
}