Statistics
| Branch: | Revision:

root / kvm-all.c @ f65ed4c1

History | View | Annotate | Download (16.2 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
10
 *
11
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12
 * See the COPYING file in the top-level directory.
13
 *
14
 */
15

    
16
#include <sys/types.h>
17
#include <sys/ioctl.h>
18
#include <sys/mman.h>
19
#include <stdarg.h>
20

    
21
#include <linux/kvm.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu.h"
25
#include "kvm.h"
26

    
27
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
28
#define PAGE_SIZE TARGET_PAGE_SIZE
29

    
30
//#define DEBUG_KVM
31

    
32
#ifdef DEBUG_KVM
33
#define dprintf(fmt, ...) \
34
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
35
#else
36
#define dprintf(fmt, ...) \
37
    do { } while (0)
38
#endif
39

    
40
typedef struct KVMSlot
41
{
42
    target_phys_addr_t start_addr;
43
    ram_addr_t memory_size;
44
    ram_addr_t phys_offset;
45
    int slot;
46
    int flags;
47
} KVMSlot;
48

    
49
typedef struct kvm_dirty_log KVMDirtyLog;
50

    
51
int kvm_allowed = 0;
52

    
53
struct KVMState
54
{
55
    KVMSlot slots[32];
56
    int fd;
57
    int vmfd;
58
    int coalesced_mmio;
59
};
60

    
61
static KVMState *kvm_state;
62

    
63
static KVMSlot *kvm_alloc_slot(KVMState *s)
64
{
65
    int i;
66

    
67
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
68
        /* KVM private memory slots */
69
        if (i >= 8 && i < 12)
70
            continue;
71
        if (s->slots[i].memory_size == 0)
72
            return &s->slots[i];
73
    }
74

    
75
    return NULL;
76
}
77

    
78
static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr)
79
{
80
    int i;
81

    
82
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
83
        KVMSlot *mem = &s->slots[i];
84

    
85
        if (start_addr >= mem->start_addr &&
86
            start_addr < (mem->start_addr + mem->memory_size))
87
            return mem;
88
    }
89

    
90
    return NULL;
91
}
92

    
93
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
94
{
95
    struct kvm_userspace_memory_region mem;
96

    
97
    mem.slot = slot->slot;
98
    mem.guest_phys_addr = slot->start_addr;
99
    mem.memory_size = slot->memory_size;
100
    mem.userspace_addr = (unsigned long)phys_ram_base + slot->phys_offset;
101
    mem.flags = slot->flags;
102

    
103
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
104
}
105

    
106

    
107
int kvm_init_vcpu(CPUState *env)
108
{
109
    KVMState *s = kvm_state;
110
    long mmap_size;
111
    int ret;
112

    
113
    dprintf("kvm_init_vcpu\n");
114

    
115
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
116
    if (ret < 0) {
117
        dprintf("kvm_create_vcpu failed\n");
118
        goto err;
119
    }
120

    
121
    env->kvm_fd = ret;
122
    env->kvm_state = s;
123

    
124
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
125
    if (mmap_size < 0) {
126
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
127
        goto err;
128
    }
129

    
130
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
131
                        env->kvm_fd, 0);
132
    if (env->kvm_run == MAP_FAILED) {
133
        ret = -errno;
134
        dprintf("mmap'ing vcpu state failed\n");
135
        goto err;
136
    }
137

    
138
    ret = kvm_arch_init_vcpu(env);
139

    
140
err:
141
    return ret;
142
}
143

    
144
/*
145
 * dirty pages logging control
146
 */
147
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, target_phys_addr_t end_addr,
148
                                      unsigned flags,
149
                                      unsigned mask)
150
{
151
    KVMState *s = kvm_state;
152
    KVMSlot *mem = kvm_lookup_slot(s, phys_addr);
153
    if (mem == NULL)  {
154
            dprintf("invalid parameters %llx-%llx\n", phys_addr, end_addr);
155
            return -EINVAL;
156
    }
157

    
158
    flags = (mem->flags & ~mask) | flags;
159
    /* Nothing changed, no need to issue ioctl */
160
    if (flags == mem->flags)
161
            return 0;
162

    
163
    mem->flags = flags;
164

    
165
    return kvm_set_user_memory_region(s, mem);
166
}
167

    
168
int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
169
{
170
        return kvm_dirty_pages_log_change(phys_addr, end_addr,
171
                                          KVM_MEM_LOG_DIRTY_PAGES,
172
                                          KVM_MEM_LOG_DIRTY_PAGES);
173
}
174

    
175
int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
176
{
177
        return kvm_dirty_pages_log_change(phys_addr, end_addr,
178
                                          0,
179
                                          KVM_MEM_LOG_DIRTY_PAGES);
180
}
181

    
182
/**
183
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
184
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
185
 * This means all bits are set to dirty.
186
 *
187
 * @start_add: start of logged region. This is what we use to search the memslot
188
 * @end_addr: end of logged region.
189
 */
190
void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
191
{
192
    KVMState *s = kvm_state;
193
    KVMDirtyLog d;
194
    KVMSlot *mem = kvm_lookup_slot(s, start_addr);
195
    unsigned long alloc_size;
196
    ram_addr_t addr;
197
    target_phys_addr_t phys_addr = start_addr;
198

    
199
    dprintf("sync addr: %llx into %lx\n", start_addr, mem->phys_offset);
200
    if (mem == NULL) {
201
            fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
202
            return;
203
    }
204

    
205
    alloc_size = mem->memory_size >> TARGET_PAGE_BITS / sizeof(d.dirty_bitmap);
206
    d.dirty_bitmap = qemu_mallocz(alloc_size);
207

    
208
    if (d.dirty_bitmap == NULL) {
209
        dprintf("Could not allocate dirty bitmap\n");
210
        return;
211
    }
212

    
213
    d.slot = mem->slot;
214
    dprintf("slot %d, phys_addr %llx, uaddr: %llx\n",
215
            d.slot, mem->start_addr, mem->phys_offset);
216

    
217
    if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
218
        dprintf("ioctl failed %d\n", errno);
219
        goto out;
220
    }
221

    
222
    phys_addr = start_addr;
223
    for (addr = mem->phys_offset; phys_addr < end_addr; phys_addr+= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
224
        unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
225
        unsigned nr = (phys_addr - start_addr) >> TARGET_PAGE_BITS;
226
        unsigned word = nr / (sizeof(*bitmap) * 8);
227
        unsigned bit = nr % (sizeof(*bitmap) * 8);
228
        if ((bitmap[word] >> bit) & 1)
229
            cpu_physical_memory_set_dirty(addr);
230
    }
231
out:
232
    qemu_free(d.dirty_bitmap);
233
}
234

    
235
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
236
{
237
    int ret = -ENOSYS;
238
#ifdef KVM_CAP_COALESCED_MMIO
239
    KVMState *s = kvm_state;
240

    
241
    if (s->coalesced_mmio) {
242
        struct kvm_coalesced_mmio_zone zone;
243

    
244
        zone.addr = start;
245
        zone.size = size;
246

    
247
        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
248
    }
249
#endif
250

    
251
    return ret;
252
}
253

    
254
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
255
{
256
    int ret = -ENOSYS;
257
#ifdef KVM_CAP_COALESCED_MMIO
258
    KVMState *s = kvm_state;
259

    
260
    if (s->coalesced_mmio) {
261
        struct kvm_coalesced_mmio_zone zone;
262

    
263
        zone.addr = start;
264
        zone.size = size;
265

    
266
        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
267
    }
268
#endif
269

    
270
    return ret;
271
}
272

    
273
int kvm_init(int smp_cpus)
274
{
275
    KVMState *s;
276
    int ret;
277
    int i;
278

    
279
    if (smp_cpus > 1)
280
        return -EINVAL;
281

    
282
    s = qemu_mallocz(sizeof(KVMState));
283
    if (s == NULL)
284
        return -ENOMEM;
285

    
286
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
287
        s->slots[i].slot = i;
288

    
289
    s->vmfd = -1;
290
    s->fd = open("/dev/kvm", O_RDWR);
291
    if (s->fd == -1) {
292
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
293
        ret = -errno;
294
        goto err;
295
    }
296

    
297
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
298
    if (ret < KVM_API_VERSION) {
299
        if (ret > 0)
300
            ret = -EINVAL;
301
        fprintf(stderr, "kvm version too old\n");
302
        goto err;
303
    }
304

    
305
    if (ret > KVM_API_VERSION) {
306
        ret = -EINVAL;
307
        fprintf(stderr, "kvm version not supported\n");
308
        goto err;
309
    }
310

    
311
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
312
    if (s->vmfd < 0)
313
        goto err;
314

    
315
    /* initially, KVM allocated its own memory and we had to jump through
316
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
317
     * just use a user allocated buffer so we can use phys_ram_base
318
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
319
     */
320
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
321
    if (ret <= 0) {
322
        if (ret == 0)
323
            ret = -EINVAL;
324
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
325
        goto err;
326
    }
327

    
328
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
329
     * destroyed properly.  Since we rely on this capability, refuse to work
330
     * with any kernel without this capability. */
331
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION,
332
                    KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
333
    if (ret <= 0) {
334
        if (ret == 0)
335
            ret = -EINVAL;
336

    
337
        fprintf(stderr,
338
                "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
339
                "Please upgrade to at least kvm-81.\n");
340
        goto err;
341
    }
342

    
343
    s->coalesced_mmio = 0;
344
#ifdef KVM_CAP_COALESCED_MMIO
345
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
346
    if (ret > 0)
347
        s->coalesced_mmio = ret;
348
#endif
349

    
350
    ret = kvm_arch_init(s, smp_cpus);
351
    if (ret < 0)
352
        goto err;
353

    
354
    kvm_state = s;
355

    
356
    return 0;
357

    
358
err:
359
    if (s) {
360
        if (s->vmfd != -1)
361
            close(s->vmfd);
362
        if (s->fd != -1)
363
            close(s->fd);
364
    }
365
    qemu_free(s);
366

    
367
    return ret;
368
}
369

    
370
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
371
                         int direction, int size, uint32_t count)
372
{
373
    int i;
374
    uint8_t *ptr = data;
375

    
376
    for (i = 0; i < count; i++) {
377
        if (direction == KVM_EXIT_IO_IN) {
378
            switch (size) {
379
            case 1:
380
                stb_p(ptr, cpu_inb(env, port));
381
                break;
382
            case 2:
383
                stw_p(ptr, cpu_inw(env, port));
384
                break;
385
            case 4:
386
                stl_p(ptr, cpu_inl(env, port));
387
                break;
388
            }
389
        } else {
390
            switch (size) {
391
            case 1:
392
                cpu_outb(env, port, ldub_p(ptr));
393
                break;
394
            case 2:
395
                cpu_outw(env, port, lduw_p(ptr));
396
                break;
397
            case 4:
398
                cpu_outl(env, port, ldl_p(ptr));
399
                break;
400
            }
401
        }
402

    
403
        ptr += size;
404
    }
405

    
406
    return 1;
407
}
408

    
409
static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
410
{
411
#ifdef KVM_CAP_COALESCED_MMIO
412
    KVMState *s = kvm_state;
413
    if (s->coalesced_mmio) {
414
        struct kvm_coalesced_mmio_ring *ring;
415

    
416
        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
417
        while (ring->first != ring->last) {
418
            struct kvm_coalesced_mmio *ent;
419

    
420
            ent = &ring->coalesced_mmio[ring->first];
421

    
422
            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
423
            /* FIXME smp_wmb() */
424
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
425
        }
426
    }
427
#endif
428
}
429

    
430
int kvm_cpu_exec(CPUState *env)
431
{
432
    struct kvm_run *run = env->kvm_run;
433
    int ret;
434

    
435
    dprintf("kvm_cpu_exec()\n");
436

    
437
    do {
438
        kvm_arch_pre_run(env, run);
439

    
440
        if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
441
            dprintf("interrupt exit requested\n");
442
            ret = 0;
443
            break;
444
        }
445

    
446
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
447
        kvm_arch_post_run(env, run);
448

    
449
        if (ret == -EINTR || ret == -EAGAIN) {
450
            dprintf("io window exit\n");
451
            ret = 0;
452
            break;
453
        }
454

    
455
        if (ret < 0) {
456
            dprintf("kvm run failed %s\n", strerror(-ret));
457
            abort();
458
        }
459

    
460
        kvm_run_coalesced_mmio(env, run);
461

    
462
        ret = 0; /* exit loop */
463
        switch (run->exit_reason) {
464
        case KVM_EXIT_IO:
465
            dprintf("handle_io\n");
466
            ret = kvm_handle_io(env, run->io.port,
467
                                (uint8_t *)run + run->io.data_offset,
468
                                run->io.direction,
469
                                run->io.size,
470
                                run->io.count);
471
            break;
472
        case KVM_EXIT_MMIO:
473
            dprintf("handle_mmio\n");
474
            cpu_physical_memory_rw(run->mmio.phys_addr,
475
                                   run->mmio.data,
476
                                   run->mmio.len,
477
                                   run->mmio.is_write);
478
            ret = 1;
479
            break;
480
        case KVM_EXIT_IRQ_WINDOW_OPEN:
481
            dprintf("irq_window_open\n");
482
            break;
483
        case KVM_EXIT_SHUTDOWN:
484
            dprintf("shutdown\n");
485
            qemu_system_reset_request();
486
            ret = 1;
487
            break;
488
        case KVM_EXIT_UNKNOWN:
489
            dprintf("kvm_exit_unknown\n");
490
            break;
491
        case KVM_EXIT_FAIL_ENTRY:
492
            dprintf("kvm_exit_fail_entry\n");
493
            break;
494
        case KVM_EXIT_EXCEPTION:
495
            dprintf("kvm_exit_exception\n");
496
            break;
497
        case KVM_EXIT_DEBUG:
498
            dprintf("kvm_exit_debug\n");
499
            break;
500
        default:
501
            dprintf("kvm_arch_handle_exit\n");
502
            ret = kvm_arch_handle_exit(env, run);
503
            break;
504
        }
505
    } while (ret > 0);
506

    
507
    if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
508
        env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
509
        env->exception_index = EXCP_INTERRUPT;
510
    }
511

    
512
    return ret;
513
}
514

    
515
void kvm_set_phys_mem(target_phys_addr_t start_addr,
516
                      ram_addr_t size,
517
                      ram_addr_t phys_offset)
518
{
519
    KVMState *s = kvm_state;
520
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
521
    KVMSlot *mem;
522

    
523
    /* KVM does not support read-only slots */
524
    phys_offset &= ~IO_MEM_ROM;
525

    
526
    mem = kvm_lookup_slot(s, start_addr);
527
    if (mem) {
528
        if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) {
529
            mem->memory_size = 0;
530
            mem->start_addr = start_addr;
531
            mem->phys_offset = 0;
532
            mem->flags = 0;
533

    
534
            kvm_set_user_memory_region(s, mem);
535
        } else if (start_addr >= mem->start_addr &&
536
                   (start_addr + size) <= (mem->start_addr +
537
                                           mem->memory_size)) {
538
            KVMSlot slot;
539
            target_phys_addr_t mem_start;
540
            ram_addr_t mem_size, mem_offset;
541

    
542
            /* Not splitting */
543
            if ((phys_offset - (start_addr - mem->start_addr)) == 
544
                mem->phys_offset)
545
                return;
546

    
547
            /* unregister whole slot */
548
            memcpy(&slot, mem, sizeof(slot));
549
            mem->memory_size = 0;
550
            kvm_set_user_memory_region(s, mem);
551

    
552
            /* register prefix slot */
553
            mem_start = slot.start_addr;
554
            mem_size = start_addr - slot.start_addr;
555
            mem_offset = slot.phys_offset;
556
            if (mem_size)
557
                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
558

    
559
            /* register new slot */
560
            kvm_set_phys_mem(start_addr, size, phys_offset);
561

    
562
            /* register suffix slot */
563
            mem_start = start_addr + size;
564
            mem_offset += mem_size + size;
565
            mem_size = slot.memory_size - mem_size - size;
566
            if (mem_size)
567
                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
568

    
569
            return;
570
        } else {
571
            printf("Registering overlapping slot\n");
572
            abort();
573
        }
574
    }
575
    /* KVM does not need to know about this memory */
576
    if (flags >= IO_MEM_UNASSIGNED)
577
        return;
578

    
579
    mem = kvm_alloc_slot(s);
580
    mem->memory_size = size;
581
    mem->start_addr = start_addr;
582
    mem->phys_offset = phys_offset;
583
    mem->flags = 0;
584

    
585
    kvm_set_user_memory_region(s, mem);
586
    /* FIXME deal with errors */
587
}
588

    
589
int kvm_ioctl(KVMState *s, int type, ...)
590
{
591
    int ret;
592
    void *arg;
593
    va_list ap;
594

    
595
    va_start(ap, type);
596
    arg = va_arg(ap, void *);
597
    va_end(ap);
598

    
599
    ret = ioctl(s->fd, type, arg);
600
    if (ret == -1)
601
        ret = -errno;
602

    
603
    return ret;
604
}
605

    
606
int kvm_vm_ioctl(KVMState *s, int type, ...)
607
{
608
    int ret;
609
    void *arg;
610
    va_list ap;
611

    
612
    va_start(ap, type);
613
    arg = va_arg(ap, void *);
614
    va_end(ap);
615

    
616
    ret = ioctl(s->vmfd, type, arg);
617
    if (ret == -1)
618
        ret = -errno;
619

    
620
    return ret;
621
}
622

    
623
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
624
{
625
    int ret;
626
    void *arg;
627
    va_list ap;
628

    
629
    va_start(ap, type);
630
    arg = va_arg(ap, void *);
631
    va_end(ap);
632

    
633
    ret = ioctl(env->kvm_fd, type, arg);
634
    if (ret == -1)
635
        ret = -errno;
636

    
637
    return ret;
638
}
639

    
640
int kvm_has_sync_mmu(void)
641
{
642
    KVMState *s = kvm_state;
643

    
644
#ifdef KVM_CAP_SYNC_MMU
645
    if (kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0)
646
        return 1;
647
#endif
648

    
649
    return 0;
650
}