Statistics
| Branch: | Revision:

root / kvm-all.c @ 1eec614b

History | View | Annotate | Download (16.3 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "kvm.h"
26

    
27
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
28
#define PAGE_SIZE TARGET_PAGE_SIZE
29

    
30
//#define DEBUG_KVM
31

    
32
#ifdef DEBUG_KVM
33
#define dprintf(fmt, ...) \
34
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
35
#else
36
#define dprintf(fmt, ...) \
37
    do { } while (0)
38
#endif
39

    
40
typedef struct KVMSlot
41
{
42
    target_phys_addr_t start_addr;
43
    ram_addr_t memory_size;
44
    ram_addr_t phys_offset;
45
    int slot;
46
    int flags;
47
} KVMSlot;
48

    
49
typedef struct kvm_dirty_log KVMDirtyLog;
50

    
51
int kvm_allowed = 0;
52

    
53
struct KVMState
54
{
55
    KVMSlot slots[32];
56
    int fd;
57
    int vmfd;
58
    int coalesced_mmio;
59
};
60

    
61
static KVMState *kvm_state;
62

    
63
static KVMSlot *kvm_alloc_slot(KVMState *s)
64
{
65
    int i;
66

    
67
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
68
        /* KVM private memory slots */
69
        if (i >= 8 && i < 12)
70
            continue;
71
        if (s->slots[i].memory_size == 0)
72
            return &s->slots[i];
73
    }
74

    
75
    return NULL;
76
}
77

    
78
static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr)
79
{
80
    int i;
81

    
82
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
83
        KVMSlot *mem = &s->slots[i];
84

    
85
        if (start_addr >= mem->start_addr &&
86
            start_addr < (mem->start_addr + mem->memory_size))
87
            return mem;
88
    }
89

    
90
    return NULL;
91
}
92

    
93
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
94
{
95
    struct kvm_userspace_memory_region mem;
96

    
97
    mem.slot = slot->slot;
98
    mem.guest_phys_addr = slot->start_addr;
99
    mem.memory_size = slot->memory_size;
100
    mem.userspace_addr = (unsigned long)phys_ram_base + slot->phys_offset;
101
    mem.flags = slot->flags;
102

    
103
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
104
}
105

    
106

    
107
int kvm_init_vcpu(CPUState *env)
108
{
109
    KVMState *s = kvm_state;
110
    long mmap_size;
111
    int ret;
112

    
113
    dprintf("kvm_init_vcpu\n");
114

    
115
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
116
    if (ret < 0) {
117
        dprintf("kvm_create_vcpu failed\n");
118
        goto err;
119
    }
120

    
121
    env->kvm_fd = ret;
122
    env->kvm_state = s;
123

    
124
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
125
    if (mmap_size < 0) {
126
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
127
        goto err;
128
    }
129

    
130
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
131
                        env->kvm_fd, 0);
132
    if (env->kvm_run == MAP_FAILED) {
133
        ret = -errno;
134
        dprintf("mmap'ing vcpu state failed\n");
135
        goto err;
136
    }
137

    
138
    ret = kvm_arch_init_vcpu(env);
139

    
140
err:
141
    return ret;
142
}
143

    
144
int kvm_sync_vcpus(void)
145
{
146
    CPUState *env;
147

    
148
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
149
        int ret;
150

    
151
        ret = kvm_arch_put_registers(env);
152
        if (ret)
153
            return ret;
154
    }
155

    
156
    return 0;
157
}
158

    
159
/*
160
 * dirty pages logging control
161
 */
162
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, target_phys_addr_t end_addr,
163
                                      unsigned flags,
164
                                      unsigned mask)
165
{
166
    KVMState *s = kvm_state;
167
    KVMSlot *mem = kvm_lookup_slot(s, phys_addr);
168
    if (mem == NULL)  {
169
            dprintf("invalid parameters %llx-%llx\n", phys_addr, end_addr);
170
            return -EINVAL;
171
    }
172

    
173
    flags = (mem->flags & ~mask) | flags;
174
    /* Nothing changed, no need to issue ioctl */
175
    if (flags == mem->flags)
176
            return 0;
177

    
178
    mem->flags = flags;
179

    
180
    return kvm_set_user_memory_region(s, mem);
181
}
182

    
183
int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
184
{
185
        return kvm_dirty_pages_log_change(phys_addr, end_addr,
186
                                          KVM_MEM_LOG_DIRTY_PAGES,
187
                                          KVM_MEM_LOG_DIRTY_PAGES);
188
}
189

    
190
int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
191
{
192
        return kvm_dirty_pages_log_change(phys_addr, end_addr,
193
                                          0,
194
                                          KVM_MEM_LOG_DIRTY_PAGES);
195
}
196

    
197
/**
198
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
199
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
200
 * This means all bits are set to dirty.
201
 *
202
 * @start_add: start of logged region. This is what we use to search the memslot
203
 * @end_addr: end of logged region.
204
 */
205
void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
206
{
207
    KVMState *s = kvm_state;
208
    KVMDirtyLog d;
209
    KVMSlot *mem = kvm_lookup_slot(s, start_addr);
210
    unsigned long alloc_size;
211
    ram_addr_t addr;
212
    target_phys_addr_t phys_addr = start_addr;
213

    
214
    dprintf("sync addr: %llx into %lx\n", start_addr, mem->phys_offset);
215
    if (mem == NULL) {
216
            fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
217
            return;
218
    }
219

    
220
    alloc_size = mem->memory_size >> TARGET_PAGE_BITS / sizeof(d.dirty_bitmap);
221
    d.dirty_bitmap = qemu_mallocz(alloc_size);
222

    
223
    d.slot = mem->slot;
224
    dprintf("slot %d, phys_addr %llx, uaddr: %llx\n",
225
            d.slot, mem->start_addr, mem->phys_offset);
226

    
227
    if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
228
        dprintf("ioctl failed %d\n", errno);
229
        goto out;
230
    }
231

    
232
    phys_addr = start_addr;
233
    for (addr = mem->phys_offset; phys_addr < end_addr; phys_addr+= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
234
        unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
235
        unsigned nr = (phys_addr - start_addr) >> TARGET_PAGE_BITS;
236
        unsigned word = nr / (sizeof(*bitmap) * 8);
237
        unsigned bit = nr % (sizeof(*bitmap) * 8);
238
        if ((bitmap[word] >> bit) & 1)
239
            cpu_physical_memory_set_dirty(addr);
240
    }
241
out:
242
    qemu_free(d.dirty_bitmap);
243
}
244

    
245
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
246
{
247
    int ret = -ENOSYS;
248
#ifdef KVM_CAP_COALESCED_MMIO
249
    KVMState *s = kvm_state;
250

    
251
    if (s->coalesced_mmio) {
252
        struct kvm_coalesced_mmio_zone zone;
253

    
254
        zone.addr = start;
255
        zone.size = size;
256

    
257
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
258
    }
259
#endif
260

    
261
    return ret;
262
}
263

    
264
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
265
{
266
    int ret = -ENOSYS;
267
#ifdef KVM_CAP_COALESCED_MMIO
268
    KVMState *s = kvm_state;
269

    
270
    if (s->coalesced_mmio) {
271
        struct kvm_coalesced_mmio_zone zone;
272

    
273
        zone.addr = start;
274
        zone.size = size;
275

    
276
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
277
    }
278
#endif
279

    
280
    return ret;
281
}
282

    
283
int kvm_init(int smp_cpus)
284
{
285
    KVMState *s;
286
    int ret;
287
    int i;
288

    
289
    if (smp_cpus > 1)
290
        return -EINVAL;
291

    
292
    s = qemu_mallocz(sizeof(KVMState));
293

    
294
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
295
        s->slots[i].slot = i;
296

    
297
    s->vmfd = -1;
298
    s->fd = open("/dev/kvm", O_RDWR);
299
    if (s->fd == -1) {
300
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
301
        ret = -errno;
302
        goto err;
303
    }
304

    
305
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
306
    if (ret < KVM_API_VERSION) {
307
        if (ret > 0)
308
            ret = -EINVAL;
309
        fprintf(stderr, "kvm version too old\n");
310
        goto err;
311
    }
312

    
313
    if (ret > KVM_API_VERSION) {
314
        ret = -EINVAL;
315
        fprintf(stderr, "kvm version not supported\n");
316
        goto err;
317
    }
318

    
319
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
320
    if (s->vmfd < 0)
321
        goto err;
322

    
323
    /* initially, KVM allocated its own memory and we had to jump through
324
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
325
     * just use a user allocated buffer so we can use phys_ram_base
326
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
327
     */
328
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
329
    if (ret <= 0) {
330
        if (ret == 0)
331
            ret = -EINVAL;
332
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
333
        goto err;
334
    }
335

    
336
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
337
     * destroyed properly.  Since we rely on this capability, refuse to work
338
     * with any kernel without this capability. */
339
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION,
340
                    KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
341
    if (ret <= 0) {
342
        if (ret == 0)
343
            ret = -EINVAL;
344

    
345
        fprintf(stderr,
346
                "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
347
                "Please upgrade to at least kvm-81.\n");
348
        goto err;
349
    }
350

    
351
    s->coalesced_mmio = 0;
352
#ifdef KVM_CAP_COALESCED_MMIO
353
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
354
    if (ret > 0)
355
        s->coalesced_mmio = ret;
356
#endif
357

    
358
    ret = kvm_arch_init(s, smp_cpus);
359
    if (ret < 0)
360
        goto err;
361

    
362
    kvm_state = s;
363

    
364
    return 0;
365

    
366
err:
367
    if (s) {
368
        if (s->vmfd != -1)
369
            close(s->vmfd);
370
        if (s->fd != -1)
371
            close(s->fd);
372
    }
373
    qemu_free(s);
374

    
375
    return ret;
376
}
377

    
378
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
379
                         int direction, int size, uint32_t count)
380
{
381
    int i;
382
    uint8_t *ptr = data;
383

    
384
    for (i = 0; i < count; i++) {
385
        if (direction == KVM_EXIT_IO_IN) {
386
            switch (size) {
387
            case 1:
388
                stb_p(ptr, cpu_inb(env, port));
389
                break;
390
            case 2:
391
                stw_p(ptr, cpu_inw(env, port));
392
                break;
393
            case 4:
394
                stl_p(ptr, cpu_inl(env, port));
395
                break;
396
            }
397
        } else {
398
            switch (size) {
399
            case 1:
400
                cpu_outb(env, port, ldub_p(ptr));
401
                break;
402
            case 2:
403
                cpu_outw(env, port, lduw_p(ptr));
404
                break;
405
            case 4:
406
                cpu_outl(env, port, ldl_p(ptr));
407
                break;
408
            }
409
        }
410

    
411
        ptr += size;
412
    }
413

    
414
    return 1;
415
}
416

    
417
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
418
{
419
#ifdef KVM_CAP_COALESCED_MMIO
420
    KVMState *s = kvm_state;
421
    if (s->coalesced_mmio) {
422
        struct kvm_coalesced_mmio_ring *ring;
423

    
424
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
425
        while (ring->first != ring->last) {
426
            struct kvm_coalesced_mmio *ent;
427

    
428
            ent = &ring->coalesced_mmio[ring->first];
429

    
430
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
431
            /* FIXME smp_wmb() */
432
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
433
        }
434
    }
435
#endif
436
}
437

    
438
int kvm_cpu_exec(CPUState *env)
439
{
440
    struct kvm_run *run = env->kvm_run;
441
    int ret;
442

    
443
    dprintf("kvm_cpu_exec()\n");
444

    
445
    do {
446
        kvm_arch_pre_run(env, run);
447

    
448
        if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
449
            dprintf("interrupt exit requested\n");
450
            ret = 0;
451
            break;
452
        }
453

    
454
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
455
        kvm_arch_post_run(env, run);
456

    
457
        if (ret == -EINTR || ret == -EAGAIN) {
458
            dprintf("io window exit\n");
459
            ret = 0;
460
            break;
461
        }
462

    
463
        if (ret < 0) {
464
            dprintf("kvm run failed %s\n", strerror(-ret));
465
            abort();
466
        }
467

    
468
        kvm_run_coalesced_mmio(env, run);
469

    
470
        ret = 0; /* exit loop */
471
        switch (run->exit_reason) {
472
        case KVM_EXIT_IO:
473
            dprintf("handle_io\n");
474
            ret = kvm_handle_io(env, run->io.port,
475
                                (uint8_t *)run + run->io.data_offset,
476
                                run->io.direction,
477
                                run->io.size,
478
                                run->io.count);
479
            break;
480
        case KVM_EXIT_MMIO:
481
            dprintf("handle_mmio\n");
482
            cpu_physical_memory_rw(run->mmio.phys_addr,
483
                                   run->mmio.data,
484
                                   run->mmio.len,
485
                                   run->mmio.is_write);
486
            ret = 1;
487
            break;
488
        case KVM_EXIT_IRQ_WINDOW_OPEN:
489
            dprintf("irq_window_open\n");
490
            break;
491
        case KVM_EXIT_SHUTDOWN:
492
            dprintf("shutdown\n");
493
            qemu_system_reset_request();
494
            ret = 1;
495
            break;
496
        case KVM_EXIT_UNKNOWN:
497
            dprintf("kvm_exit_unknown\n");
498
            break;
499
        case KVM_EXIT_FAIL_ENTRY:
500
            dprintf("kvm_exit_fail_entry\n");
501
            break;
502
        case KVM_EXIT_EXCEPTION:
503
            dprintf("kvm_exit_exception\n");
504
            break;
505
        case KVM_EXIT_DEBUG:
506
            dprintf("kvm_exit_debug\n");
507
            break;
508
        default:
509
            dprintf("kvm_arch_handle_exit\n");
510
            ret = kvm_arch_handle_exit(env, run);
511
            break;
512
        }
513
    } while (ret > 0);
514

    
515
    if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
516
        env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
517
        env->exception_index = EXCP_INTERRUPT;
518
    }
519

    
520
    return ret;
521
}
522

    
523
void kvm_set_phys_mem(target_phys_addr_t start_addr,
524
                      ram_addr_t size,
525
                      ram_addr_t phys_offset)
526
{
527
    KVMState *s = kvm_state;
528
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
529
    KVMSlot *mem;
530

    
531
    /* KVM does not support read-only slots */
532
    phys_offset &= ~IO_MEM_ROM;
533

    
534
    mem = kvm_lookup_slot(s, start_addr);
535
    if (mem) {
536
        if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) {
537
            mem->memory_size = 0;
538
            mem->start_addr = start_addr;
539
            mem->phys_offset = 0;
540
            mem->flags = 0;
541

    
542
            kvm_set_user_memory_region(s, mem);
543
        } else if (start_addr >= mem->start_addr &&
544
                   (start_addr + size) <= (mem->start_addr +
545
                                           mem->memory_size)) {
546
            KVMSlot slot;
547
            target_phys_addr_t mem_start;
548
            ram_addr_t mem_size, mem_offset;
549

    
550
            /* Not splitting */
551
            if ((phys_offset - (start_addr - mem->start_addr)) == 
552
                mem->phys_offset)
553
                return;
554

    
555
            /* unregister whole slot */
556
            memcpy(&slot, mem, sizeof(slot));
557
            mem->memory_size = 0;
558
            kvm_set_user_memory_region(s, mem);
559

    
560
            /* register prefix slot */
561
            mem_start = slot.start_addr;
562
            mem_size = start_addr - slot.start_addr;
563
            mem_offset = slot.phys_offset;
564
            if (mem_size)
565
                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
566

    
567
            /* register new slot */
568
            kvm_set_phys_mem(start_addr, size, phys_offset);
569

    
570
            /* register suffix slot */
571
            mem_start = start_addr + size;
572
            mem_offset += mem_size + size;
573
            mem_size = slot.memory_size - mem_size - size;
574
            if (mem_size)
575
                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
576

    
577
            return;
578
        } else {
579
            printf("Registering overlapping slot\n");
580
            abort();
581
        }
582
    }
583
    /* KVM does not need to know about this memory */
584
    if (flags >= IO_MEM_UNASSIGNED)
585
        return;
586

    
587
    mem = kvm_alloc_slot(s);
588
    mem->memory_size = size;
589
    mem->start_addr = start_addr;
590
    mem->phys_offset = phys_offset;
591
    mem->flags = 0;
592

    
593
    kvm_set_user_memory_region(s, mem);
594
    /* FIXME deal with errors */
595
}
596

    
597
int kvm_ioctl(KVMState *s, int type, ...)
598
{
599
    int ret;
600
    void *arg;
601
    va_list ap;
602

    
603
    va_start(ap, type);
604
    arg = va_arg(ap, void *);
605
    va_end(ap);
606

    
607
    ret = ioctl(s->fd, type, arg);
608
    if (ret == -1)
609
        ret = -errno;
610

    
611
    return ret;
612
}
613

    
614
int kvm_vm_ioctl(KVMState *s, int type, ...)
615
{
616
    int ret;
617
    void *arg;
618
    va_list ap;
619

    
620
    va_start(ap, type);
621
    arg = va_arg(ap, void *);
622
    va_end(ap);
623

    
624
    ret = ioctl(s->vmfd, type, arg);
625
    if (ret == -1)
626
        ret = -errno;
627

    
628
    return ret;
629
}
630

    
631
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
632
{
633
    int ret;
634
    void *arg;
635
    va_list ap;
636

    
637
    va_start(ap, type);
638
    arg = va_arg(ap, void *);
639
    va_end(ap);
640

    
641
    ret = ioctl(env->kvm_fd, type, arg);
642
    if (ret == -1)
643
        ret = -errno;
644

    
645
    return ret;
646
}
647

    
648
int kvm_has_sync_mmu(void)
649
{
650
#ifdef KVM_CAP_SYNC_MMU
651
    KVMState *s = kvm_state;
652

    
653
    if (kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0)
654
        return 1;
655
#endif
656

    
657
    return 0;
658
}