Statistics
| Branch: | Revision:

root / target-i386 / kvm.c @ ea375f9a

History | View | Annotate | Download (32.2 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright (C) 2006-2008 Qumranet Technologies
5
 * Copyright IBM, Corp. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *
10
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11
 * See the COPYING file in the top-level directory.
12
 *
13
 */
14

    
15
#include <sys/types.h>
16
#include <sys/ioctl.h>
17
#include <sys/mman.h>
18

    
19
#include <linux/kvm.h>
20

    
21
#include "qemu-common.h"
22
#include "sysemu.h"
23
#include "kvm.h"
24
#include "cpu.h"
25
#include "gdbstub.h"
26
#include "host-utils.h"
27
#include "hw/pc.h"
28
#include "ioport.h"
29

    
30
#ifdef CONFIG_KVM_PARA
31
#include <linux/kvm_para.h>
32
#endif
33
//
34
//#define DEBUG_KVM
35

    
36
#ifdef DEBUG_KVM
37
#define dprintf(fmt, ...) \
38
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
39
#else
40
#define dprintf(fmt, ...) \
41
    do { } while (0)
42
#endif
43

    
44
#define MSR_KVM_WALL_CLOCK  0x11
45
#define MSR_KVM_SYSTEM_TIME 0x12
46

    
47
#ifdef KVM_CAP_EXT_CPUID
48

    
49
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
50
{
51
    struct kvm_cpuid2 *cpuid;
52
    int r, size;
53

    
54
    size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
55
    cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
56
    cpuid->nent = max;
57
    r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
58
    if (r == 0 && cpuid->nent >= max) {
59
        r = -E2BIG;
60
    }
61
    if (r < 0) {
62
        if (r == -E2BIG) {
63
            qemu_free(cpuid);
64
            return NULL;
65
        } else {
66
            fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
67
                    strerror(-r));
68
            exit(1);
69
        }
70
    }
71
    return cpuid;
72
}
73

    
74
uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
75
{
76
    struct kvm_cpuid2 *cpuid;
77
    int i, max;
78
    uint32_t ret = 0;
79
    uint32_t cpuid_1_edx;
80

    
81
    if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
82
        return -1U;
83
    }
84

    
85
    max = 1;
86
    while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
87
        max *= 2;
88
    }
89

    
90
    for (i = 0; i < cpuid->nent; ++i) {
91
        if (cpuid->entries[i].function == function) {
92
            switch (reg) {
93
            case R_EAX:
94
                ret = cpuid->entries[i].eax;
95
                break;
96
            case R_EBX:
97
                ret = cpuid->entries[i].ebx;
98
                break;
99
            case R_ECX:
100
                ret = cpuid->entries[i].ecx;
101
                break;
102
            case R_EDX:
103
                ret = cpuid->entries[i].edx;
104
                switch (function) {
105
                case 1:
106
                    /* KVM before 2.6.30 misreports the following features */
107
                    ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA;
108
                    break;
109
                case 0x80000001:
110
                    /* On Intel, kvm returns cpuid according to the Intel spec,
111
                     * so add missing bits according to the AMD spec:
112
                     */
113
                    cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
114
                    ret |= cpuid_1_edx & 0xdfeff7ff;
115
                    break;
116
                }
117
                break;
118
            }
119
        }
120
    }
121

    
122
    qemu_free(cpuid);
123

    
124
    return ret;
125
}
126

    
127
#else
128

    
129
uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
130
{
131
    return -1U;
132
}
133

    
134
#endif
135

    
136
static void kvm_trim_features(uint32_t *features, uint32_t supported)
137
{
138
    int i;
139
    uint32_t mask;
140

    
141
    for (i = 0; i < 32; ++i) {
142
        mask = 1U << i;
143
        if ((*features & mask) && !(supported & mask)) {
144
            *features &= ~mask;
145
        }
146
    }
147
}
148

    
149
#ifdef CONFIG_KVM_PARA
150
struct kvm_para_features {
151
        int cap;
152
        int feature;
153
} para_features[] = {
154
#ifdef KVM_CAP_CLOCKSOURCE
155
        { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
156
#endif
157
#ifdef KVM_CAP_NOP_IO_DELAY
158
        { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
159
#endif
160
#ifdef KVM_CAP_PV_MMU
161
        { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
162
#endif
163
        { -1, -1 }
164
};
165

    
166
static int get_para_features(CPUState *env)
167
{
168
        int i, features = 0;
169

    
170
        for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
171
                if (kvm_check_extension(env->kvm_state, para_features[i].cap))
172
                        features |= (1 << para_features[i].feature);
173
        }
174

    
175
        return features;
176
}
177
#endif
178

    
179
int kvm_arch_init_vcpu(CPUState *env)
180
{
181
    struct {
182
        struct kvm_cpuid2 cpuid;
183
        struct kvm_cpuid_entry2 entries[100];
184
    } __attribute__((packed)) cpuid_data;
185
    uint32_t limit, i, j, cpuid_i;
186
    uint32_t unused;
187
    struct kvm_cpuid_entry2 *c;
188
#ifdef KVM_CPUID_SIGNATURE
189
    uint32_t signature[3];
190
#endif
191

    
192
    env->mp_state = KVM_MP_STATE_RUNNABLE;
193

    
194
    kvm_trim_features(&env->cpuid_features,
195
        kvm_arch_get_supported_cpuid(env, 1, R_EDX));
196

    
197
    i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
198
    kvm_trim_features(&env->cpuid_ext_features,
199
        kvm_arch_get_supported_cpuid(env, 1, R_ECX));
200
    env->cpuid_ext_features |= i;
201

    
202
    kvm_trim_features(&env->cpuid_ext2_features,
203
        kvm_arch_get_supported_cpuid(env, 0x80000001, R_EDX));
204
    kvm_trim_features(&env->cpuid_ext3_features,
205
        kvm_arch_get_supported_cpuid(env, 0x80000001, R_ECX));
206

    
207
    cpuid_i = 0;
208

    
209
#ifdef CONFIG_KVM_PARA
210
    /* Paravirtualization CPUIDs */
211
    memcpy(signature, "KVMKVMKVM\0\0\0", 12);
212
    c = &cpuid_data.entries[cpuid_i++];
213
    memset(c, 0, sizeof(*c));
214
    c->function = KVM_CPUID_SIGNATURE;
215
    c->eax = 0;
216
    c->ebx = signature[0];
217
    c->ecx = signature[1];
218
    c->edx = signature[2];
219

    
220
    c = &cpuid_data.entries[cpuid_i++];
221
    memset(c, 0, sizeof(*c));
222
    c->function = KVM_CPUID_FEATURES;
223
    c->eax = env->cpuid_kvm_features & get_para_features(env);
224
#endif
225

    
226
    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
227

    
228
    for (i = 0; i <= limit; i++) {
229
        c = &cpuid_data.entries[cpuid_i++];
230

    
231
        switch (i) {
232
        case 2: {
233
            /* Keep reading function 2 till all the input is received */
234
            int times;
235

    
236
            c->function = i;
237
            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
238
                       KVM_CPUID_FLAG_STATE_READ_NEXT;
239
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
240
            times = c->eax & 0xff;
241

    
242
            for (j = 1; j < times; ++j) {
243
                c = &cpuid_data.entries[cpuid_i++];
244
                c->function = i;
245
                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
246
                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
247
            }
248
            break;
249
        }
250
        case 4:
251
        case 0xb:
252
        case 0xd:
253
            for (j = 0; ; j++) {
254
                c->function = i;
255
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
256
                c->index = j;
257
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
258

    
259
                if (i == 4 && c->eax == 0)
260
                    break;
261
                if (i == 0xb && !(c->ecx & 0xff00))
262
                    break;
263
                if (i == 0xd && c->eax == 0)
264
                    break;
265

    
266
                c = &cpuid_data.entries[cpuid_i++];
267
            }
268
            break;
269
        default:
270
            c->function = i;
271
            c->flags = 0;
272
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
273
            break;
274
        }
275
    }
276
    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
277

    
278
    for (i = 0x80000000; i <= limit; i++) {
279
        c = &cpuid_data.entries[cpuid_i++];
280

    
281
        c->function = i;
282
        c->flags = 0;
283
        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
284
    }
285

    
286
    cpuid_data.cpuid.nent = cpuid_i;
287

    
288
    return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
289
}
290

    
291
void kvm_arch_reset_vcpu(CPUState *env)
292
{
293
    env->exception_injected = -1;
294
    env->interrupt_injected = -1;
295
    env->nmi_injected = 0;
296
    env->nmi_pending = 0;
297
}
298

    
299
static int kvm_has_msr_star(CPUState *env)
300
{
301
    static int has_msr_star;
302
    int ret;
303

    
304
    /* first time */
305
    if (has_msr_star == 0) {        
306
        struct kvm_msr_list msr_list, *kvm_msr_list;
307

    
308
        has_msr_star = -1;
309

    
310
        /* Obtain MSR list from KVM.  These are the MSRs that we must
311
         * save/restore */
312
        msr_list.nmsrs = 0;
313
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
314
        if (ret < 0 && ret != -E2BIG) {
315
            return 0;
316
        }
317
        /* Old kernel modules had a bug and could write beyond the provided
318
           memory. Allocate at least a safe amount of 1K. */
319
        kvm_msr_list = qemu_mallocz(MAX(1024, sizeof(msr_list) +
320
                                              msr_list.nmsrs *
321
                                              sizeof(msr_list.indices[0])));
322

    
323
        kvm_msr_list->nmsrs = msr_list.nmsrs;
324
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
325
        if (ret >= 0) {
326
            int i;
327

    
328
            for (i = 0; i < kvm_msr_list->nmsrs; i++) {
329
                if (kvm_msr_list->indices[i] == MSR_STAR) {
330
                    has_msr_star = 1;
331
                    break;
332
                }
333
            }
334
        }
335

    
336
        free(kvm_msr_list);
337
    }
338

    
339
    if (has_msr_star == 1)
340
        return 1;
341
    return 0;
342
}
343

    
344
int kvm_arch_init(KVMState *s, int smp_cpus)
345
{
346
    int ret;
347

    
348
    /* create vm86 tss.  KVM uses vm86 mode to emulate 16-bit code
349
     * directly.  In order to use vm86 mode, a TSS is needed.  Since this
350
     * must be part of guest physical memory, we need to allocate it.  Older
351
     * versions of KVM just assumed that it would be at the end of physical
352
     * memory but that doesn't work with more than 4GB of memory.  We simply
353
     * refuse to work with those older versions of KVM. */
354
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
355
    if (ret <= 0) {
356
        fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
357
        return ret;
358
    }
359

    
360
    /* this address is 3 pages before the bios, and the bios should present
361
     * as unavaible memory.  FIXME, need to ensure the e820 map deals with
362
     * this?
363
     */
364
    /*
365
     * Tell fw_cfg to notify the BIOS to reserve the range.
366
     */
367
    if (e820_add_entry(0xfffbc000, 0x4000, E820_RESERVED) < 0) {
368
        perror("e820_add_entry() table is full");
369
        exit(1);
370
    }
371
    return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
372
}
373
                    
374
static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
375
{
376
    lhs->selector = rhs->selector;
377
    lhs->base = rhs->base;
378
    lhs->limit = rhs->limit;
379
    lhs->type = 3;
380
    lhs->present = 1;
381
    lhs->dpl = 3;
382
    lhs->db = 0;
383
    lhs->s = 1;
384
    lhs->l = 0;
385
    lhs->g = 0;
386
    lhs->avl = 0;
387
    lhs->unusable = 0;
388
}
389

    
390
static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
391
{
392
    unsigned flags = rhs->flags;
393
    lhs->selector = rhs->selector;
394
    lhs->base = rhs->base;
395
    lhs->limit = rhs->limit;
396
    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
397
    lhs->present = (flags & DESC_P_MASK) != 0;
398
    lhs->dpl = rhs->selector & 3;
399
    lhs->db = (flags >> DESC_B_SHIFT) & 1;
400
    lhs->s = (flags & DESC_S_MASK) != 0;
401
    lhs->l = (flags >> DESC_L_SHIFT) & 1;
402
    lhs->g = (flags & DESC_G_MASK) != 0;
403
    lhs->avl = (flags & DESC_AVL_MASK) != 0;
404
    lhs->unusable = 0;
405
}
406

    
407
static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
408
{
409
    lhs->selector = rhs->selector;
410
    lhs->base = rhs->base;
411
    lhs->limit = rhs->limit;
412
    lhs->flags =
413
        (rhs->type << DESC_TYPE_SHIFT)
414
        | (rhs->present * DESC_P_MASK)
415
        | (rhs->dpl << DESC_DPL_SHIFT)
416
        | (rhs->db << DESC_B_SHIFT)
417
        | (rhs->s * DESC_S_MASK)
418
        | (rhs->l << DESC_L_SHIFT)
419
        | (rhs->g * DESC_G_MASK)
420
        | (rhs->avl * DESC_AVL_MASK);
421
}
422

    
423
static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
424
{
425
    if (set)
426
        *kvm_reg = *qemu_reg;
427
    else
428
        *qemu_reg = *kvm_reg;
429
}
430

    
431
static int kvm_getput_regs(CPUState *env, int set)
432
{
433
    struct kvm_regs regs;
434
    int ret = 0;
435

    
436
    if (!set) {
437
        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
438
        if (ret < 0)
439
            return ret;
440
    }
441

    
442
    kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
443
    kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
444
    kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
445
    kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
446
    kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
447
    kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
448
    kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
449
    kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
450
#ifdef TARGET_X86_64
451
    kvm_getput_reg(&regs.r8, &env->regs[8], set);
452
    kvm_getput_reg(&regs.r9, &env->regs[9], set);
453
    kvm_getput_reg(&regs.r10, &env->regs[10], set);
454
    kvm_getput_reg(&regs.r11, &env->regs[11], set);
455
    kvm_getput_reg(&regs.r12, &env->regs[12], set);
456
    kvm_getput_reg(&regs.r13, &env->regs[13], set);
457
    kvm_getput_reg(&regs.r14, &env->regs[14], set);
458
    kvm_getput_reg(&regs.r15, &env->regs[15], set);
459
#endif
460

    
461
    kvm_getput_reg(&regs.rflags, &env->eflags, set);
462
    kvm_getput_reg(&regs.rip, &env->eip, set);
463

    
464
    if (set)
465
        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
466

    
467
    return ret;
468
}
469

    
470
static int kvm_put_fpu(CPUState *env)
471
{
472
    struct kvm_fpu fpu;
473
    int i;
474

    
475
    memset(&fpu, 0, sizeof fpu);
476
    fpu.fsw = env->fpus & ~(7 << 11);
477
    fpu.fsw |= (env->fpstt & 7) << 11;
478
    fpu.fcw = env->fpuc;
479
    for (i = 0; i < 8; ++i)
480
        fpu.ftwx |= (!env->fptags[i]) << i;
481
    memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
482
    memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
483
    fpu.mxcsr = env->mxcsr;
484

    
485
    return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
486
}
487

    
488
static int kvm_put_sregs(CPUState *env)
489
{
490
    struct kvm_sregs sregs;
491

    
492
    memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
493
    if (env->interrupt_injected >= 0) {
494
        sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
495
                (uint64_t)1 << (env->interrupt_injected % 64);
496
    }
497

    
498
    if ((env->eflags & VM_MASK)) {
499
            set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
500
            set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
501
            set_v8086_seg(&sregs.es, &env->segs[R_ES]);
502
            set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
503
            set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
504
            set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
505
    } else {
506
            set_seg(&sregs.cs, &env->segs[R_CS]);
507
            set_seg(&sregs.ds, &env->segs[R_DS]);
508
            set_seg(&sregs.es, &env->segs[R_ES]);
509
            set_seg(&sregs.fs, &env->segs[R_FS]);
510
            set_seg(&sregs.gs, &env->segs[R_GS]);
511
            set_seg(&sregs.ss, &env->segs[R_SS]);
512

    
513
            if (env->cr[0] & CR0_PE_MASK) {
514
                /* force ss cpl to cs cpl */
515
                sregs.ss.selector = (sregs.ss.selector & ~3) |
516
                        (sregs.cs.selector & 3);
517
                sregs.ss.dpl = sregs.ss.selector & 3;
518
            }
519
    }
520

    
521
    set_seg(&sregs.tr, &env->tr);
522
    set_seg(&sregs.ldt, &env->ldt);
523

    
524
    sregs.idt.limit = env->idt.limit;
525
    sregs.idt.base = env->idt.base;
526
    sregs.gdt.limit = env->gdt.limit;
527
    sregs.gdt.base = env->gdt.base;
528

    
529
    sregs.cr0 = env->cr[0];
530
    sregs.cr2 = env->cr[2];
531
    sregs.cr3 = env->cr[3];
532
    sregs.cr4 = env->cr[4];
533

    
534
    sregs.cr8 = cpu_get_apic_tpr(env);
535
    sregs.apic_base = cpu_get_apic_base(env);
536

    
537
    sregs.efer = env->efer;
538

    
539
    return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
540
}
541

    
542
static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
543
                              uint32_t index, uint64_t value)
544
{
545
    entry->index = index;
546
    entry->data = value;
547
}
548

    
549
static int kvm_put_msrs(CPUState *env)
550
{
551
    struct {
552
        struct kvm_msrs info;
553
        struct kvm_msr_entry entries[100];
554
    } msr_data;
555
    struct kvm_msr_entry *msrs = msr_data.entries;
556
    int n = 0;
557

    
558
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
559
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
560
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
561
    if (kvm_has_msr_star(env))
562
        kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
563
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
564
#ifdef TARGET_X86_64
565
    /* FIXME if lm capable */
566
    kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
567
    kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
568
    kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
569
    kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
570
#endif
571
    kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,  env->system_time_msr);
572
    kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK,  env->wall_clock_msr);
573

    
574
    msr_data.info.nmsrs = n;
575

    
576
    return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
577

    
578
}
579

    
580

    
581
static int kvm_get_fpu(CPUState *env)
582
{
583
    struct kvm_fpu fpu;
584
    int i, ret;
585

    
586
    ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
587
    if (ret < 0)
588
        return ret;
589

    
590
    env->fpstt = (fpu.fsw >> 11) & 7;
591
    env->fpus = fpu.fsw;
592
    env->fpuc = fpu.fcw;
593
    for (i = 0; i < 8; ++i)
594
        env->fptags[i] = !((fpu.ftwx >> i) & 1);
595
    memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
596
    memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
597
    env->mxcsr = fpu.mxcsr;
598

    
599
    return 0;
600
}
601

    
602
static int kvm_get_sregs(CPUState *env)
603
{
604
    struct kvm_sregs sregs;
605
    uint32_t hflags;
606
    int bit, i, ret;
607

    
608
    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
609
    if (ret < 0)
610
        return ret;
611

    
612
    /* There can only be one pending IRQ set in the bitmap at a time, so try
613
       to find it and save its number instead (-1 for none). */
614
    env->interrupt_injected = -1;
615
    for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
616
        if (sregs.interrupt_bitmap[i]) {
617
            bit = ctz64(sregs.interrupt_bitmap[i]);
618
            env->interrupt_injected = i * 64 + bit;
619
            break;
620
        }
621
    }
622

    
623
    get_seg(&env->segs[R_CS], &sregs.cs);
624
    get_seg(&env->segs[R_DS], &sregs.ds);
625
    get_seg(&env->segs[R_ES], &sregs.es);
626
    get_seg(&env->segs[R_FS], &sregs.fs);
627
    get_seg(&env->segs[R_GS], &sregs.gs);
628
    get_seg(&env->segs[R_SS], &sregs.ss);
629

    
630
    get_seg(&env->tr, &sregs.tr);
631
    get_seg(&env->ldt, &sregs.ldt);
632

    
633
    env->idt.limit = sregs.idt.limit;
634
    env->idt.base = sregs.idt.base;
635
    env->gdt.limit = sregs.gdt.limit;
636
    env->gdt.base = sregs.gdt.base;
637

    
638
    env->cr[0] = sregs.cr0;
639
    env->cr[2] = sregs.cr2;
640
    env->cr[3] = sregs.cr3;
641
    env->cr[4] = sregs.cr4;
642

    
643
    cpu_set_apic_base(env, sregs.apic_base);
644

    
645
    env->efer = sregs.efer;
646
    //cpu_set_apic_tpr(env, sregs.cr8);
647

    
648
#define HFLAG_COPY_MASK ~( \
649
                        HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
650
                        HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
651
                        HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
652
                        HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
653

    
654

    
655

    
656
    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
657
    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
658
    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
659
            (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
660
    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
661
    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
662
            (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
663

    
664
    if (env->efer & MSR_EFER_LMA) {
665
        hflags |= HF_LMA_MASK;
666
    }
667

    
668
    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
669
        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
670
    } else {
671
        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
672
                (DESC_B_SHIFT - HF_CS32_SHIFT);
673
        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
674
                (DESC_B_SHIFT - HF_SS32_SHIFT);
675
        if (!(env->cr[0] & CR0_PE_MASK) ||
676
                   (env->eflags & VM_MASK) ||
677
                   !(hflags & HF_CS32_MASK)) {
678
                hflags |= HF_ADDSEG_MASK;
679
            } else {
680
                hflags |= ((env->segs[R_DS].base |
681
                                env->segs[R_ES].base |
682
                                env->segs[R_SS].base) != 0) <<
683
                    HF_ADDSEG_SHIFT;
684
            }
685
    }
686
    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
687

    
688
    return 0;
689
}
690

    
691
static int kvm_get_msrs(CPUState *env)
692
{
693
    struct {
694
        struct kvm_msrs info;
695
        struct kvm_msr_entry entries[100];
696
    } msr_data;
697
    struct kvm_msr_entry *msrs = msr_data.entries;
698
    int ret, i, n;
699

    
700
    n = 0;
701
    msrs[n++].index = MSR_IA32_SYSENTER_CS;
702
    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
703
    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
704
    if (kvm_has_msr_star(env))
705
        msrs[n++].index = MSR_STAR;
706
    msrs[n++].index = MSR_IA32_TSC;
707
#ifdef TARGET_X86_64
708
    /* FIXME lm_capable_kernel */
709
    msrs[n++].index = MSR_CSTAR;
710
    msrs[n++].index = MSR_KERNELGSBASE;
711
    msrs[n++].index = MSR_FMASK;
712
    msrs[n++].index = MSR_LSTAR;
713
#endif
714
    msrs[n++].index = MSR_KVM_SYSTEM_TIME;
715
    msrs[n++].index = MSR_KVM_WALL_CLOCK;
716

    
717
    msr_data.info.nmsrs = n;
718
    ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
719
    if (ret < 0)
720
        return ret;
721

    
722
    for (i = 0; i < ret; i++) {
723
        switch (msrs[i].index) {
724
        case MSR_IA32_SYSENTER_CS:
725
            env->sysenter_cs = msrs[i].data;
726
            break;
727
        case MSR_IA32_SYSENTER_ESP:
728
            env->sysenter_esp = msrs[i].data;
729
            break;
730
        case MSR_IA32_SYSENTER_EIP:
731
            env->sysenter_eip = msrs[i].data;
732
            break;
733
        case MSR_STAR:
734
            env->star = msrs[i].data;
735
            break;
736
#ifdef TARGET_X86_64
737
        case MSR_CSTAR:
738
            env->cstar = msrs[i].data;
739
            break;
740
        case MSR_KERNELGSBASE:
741
            env->kernelgsbase = msrs[i].data;
742
            break;
743
        case MSR_FMASK:
744
            env->fmask = msrs[i].data;
745
            break;
746
        case MSR_LSTAR:
747
            env->lstar = msrs[i].data;
748
            break;
749
#endif
750
        case MSR_IA32_TSC:
751
            env->tsc = msrs[i].data;
752
            break;
753
        case MSR_KVM_SYSTEM_TIME:
754
            env->system_time_msr = msrs[i].data;
755
            break;
756
        case MSR_KVM_WALL_CLOCK:
757
            env->wall_clock_msr = msrs[i].data;
758
            break;
759
        }
760
    }
761

    
762
    return 0;
763
}
764

    
765
static int kvm_put_mp_state(CPUState *env)
766
{
767
    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
768

    
769
    return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
770
}
771

    
772
static int kvm_get_mp_state(CPUState *env)
773
{
774
    struct kvm_mp_state mp_state;
775
    int ret;
776

    
777
    ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
778
    if (ret < 0) {
779
        return ret;
780
    }
781
    env->mp_state = mp_state.mp_state;
782
    return 0;
783
}
784

    
785
static int kvm_put_vcpu_events(CPUState *env)
786
{
787
#ifdef KVM_CAP_VCPU_EVENTS
788
    struct kvm_vcpu_events events;
789

    
790
    if (!kvm_has_vcpu_events()) {
791
        return 0;
792
    }
793

    
794
    events.exception.injected = (env->exception_injected >= 0);
795
    events.exception.nr = env->exception_injected;
796
    events.exception.has_error_code = env->has_error_code;
797
    events.exception.error_code = env->error_code;
798

    
799
    events.interrupt.injected = (env->interrupt_injected >= 0);
800
    events.interrupt.nr = env->interrupt_injected;
801
    events.interrupt.soft = env->soft_interrupt;
802

    
803
    events.nmi.injected = env->nmi_injected;
804
    events.nmi.pending = env->nmi_pending;
805
    events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
806

    
807
    events.sipi_vector = env->sipi_vector;
808

    
809
    events.flags =
810
        KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
811

    
812
    return kvm_vcpu_ioctl(env, KVM_SET_VCPU_EVENTS, &events);
813
#else
814
    return 0;
815
#endif
816
}
817

    
818
static int kvm_get_vcpu_events(CPUState *env)
819
{
820
#ifdef KVM_CAP_VCPU_EVENTS
821
    struct kvm_vcpu_events events;
822
    int ret;
823

    
824
    if (!kvm_has_vcpu_events()) {
825
        return 0;
826
    }
827

    
828
    ret = kvm_vcpu_ioctl(env, KVM_GET_VCPU_EVENTS, &events);
829
    if (ret < 0) {
830
       return ret;
831
    }
832
    env->exception_injected =
833
       events.exception.injected ? events.exception.nr : -1;
834
    env->has_error_code = events.exception.has_error_code;
835
    env->error_code = events.exception.error_code;
836

    
837
    env->interrupt_injected =
838
        events.interrupt.injected ? events.interrupt.nr : -1;
839
    env->soft_interrupt = events.interrupt.soft;
840

    
841
    env->nmi_injected = events.nmi.injected;
842
    env->nmi_pending = events.nmi.pending;
843
    if (events.nmi.masked) {
844
        env->hflags2 |= HF2_NMI_MASK;
845
    } else {
846
        env->hflags2 &= ~HF2_NMI_MASK;
847
    }
848

    
849
    env->sipi_vector = events.sipi_vector;
850
#endif
851

    
852
    return 0;
853
}
854

    
855
static int kvm_guest_debug_workarounds(CPUState *env)
856
{
857
    int ret = 0;
858
#ifdef KVM_CAP_SET_GUEST_DEBUG
859
    unsigned long reinject_trap = 0;
860

    
861
    if (!kvm_has_vcpu_events()) {
862
        if (env->exception_injected == 1) {
863
            reinject_trap = KVM_GUESTDBG_INJECT_DB;
864
        } else if (env->exception_injected == 3) {
865
            reinject_trap = KVM_GUESTDBG_INJECT_BP;
866
        }
867
        env->exception_injected = -1;
868
    }
869

    
870
    /*
871
     * Kernels before KVM_CAP_X86_ROBUST_SINGLESTEP overwrote flags.TF
872
     * injected via SET_GUEST_DEBUG while updating GP regs. Work around this
873
     * by updating the debug state once again if single-stepping is on.
874
     * Another reason to call kvm_update_guest_debug here is a pending debug
875
     * trap raise by the guest. On kernels without SET_VCPU_EVENTS we have to
876
     * reinject them via SET_GUEST_DEBUG.
877
     */
878
    if (reinject_trap ||
879
        (!kvm_has_robust_singlestep() && env->singlestep_enabled)) {
880
        ret = kvm_update_guest_debug(env, reinject_trap);
881
    }
882
#endif /* KVM_CAP_SET_GUEST_DEBUG */
883
    return ret;
884
}
885

    
886
int kvm_arch_put_registers(CPUState *env, int level)
887
{
888
    int ret;
889

    
890
    ret = kvm_getput_regs(env, 1);
891
    if (ret < 0)
892
        return ret;
893

    
894
    ret = kvm_put_fpu(env);
895
    if (ret < 0)
896
        return ret;
897

    
898
    ret = kvm_put_sregs(env);
899
    if (ret < 0)
900
        return ret;
901

    
902
    ret = kvm_put_msrs(env);
903
    if (ret < 0)
904
        return ret;
905

    
906
    ret = kvm_put_mp_state(env);
907
    if (ret < 0)
908
        return ret;
909

    
910
    ret = kvm_put_vcpu_events(env);
911
    if (ret < 0)
912
        return ret;
913

    
914
    /* must be last */
915
    ret = kvm_guest_debug_workarounds(env);
916
    if (ret < 0)
917
        return ret;
918

    
919
    return 0;
920
}
921

    
922
int kvm_arch_get_registers(CPUState *env)
923
{
924
    int ret;
925

    
926
    ret = kvm_getput_regs(env, 0);
927
    if (ret < 0)
928
        return ret;
929

    
930
    ret = kvm_get_fpu(env);
931
    if (ret < 0)
932
        return ret;
933

    
934
    ret = kvm_get_sregs(env);
935
    if (ret < 0)
936
        return ret;
937

    
938
    ret = kvm_get_msrs(env);
939
    if (ret < 0)
940
        return ret;
941

    
942
    ret = kvm_get_mp_state(env);
943
    if (ret < 0)
944
        return ret;
945

    
946
    ret = kvm_get_vcpu_events(env);
947
    if (ret < 0)
948
        return ret;
949

    
950
    return 0;
951
}
952

    
953
int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
954
{
955
    /* Try to inject an interrupt if the guest can accept it */
956
    if (run->ready_for_interrupt_injection &&
957
        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
958
        (env->eflags & IF_MASK)) {
959
        int irq;
960

    
961
        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
962
        irq = cpu_get_pic_interrupt(env);
963
        if (irq >= 0) {
964
            struct kvm_interrupt intr;
965
            intr.irq = irq;
966
            /* FIXME: errors */
967
            dprintf("injected interrupt %d\n", irq);
968
            kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
969
        }
970
    }
971

    
972
    /* If we have an interrupt but the guest is not ready to receive an
973
     * interrupt, request an interrupt window exit.  This will
974
     * cause a return to userspace as soon as the guest is ready to
975
     * receive interrupts. */
976
    if ((env->interrupt_request & CPU_INTERRUPT_HARD))
977
        run->request_interrupt_window = 1;
978
    else
979
        run->request_interrupt_window = 0;
980

    
981
    dprintf("setting tpr\n");
982
    run->cr8 = cpu_get_apic_tpr(env);
983

    
984
    return 0;
985
}
986

    
987
int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
988
{
989
    if (run->if_flag)
990
        env->eflags |= IF_MASK;
991
    else
992
        env->eflags &= ~IF_MASK;
993
    
994
    cpu_set_apic_tpr(env, run->cr8);
995
    cpu_set_apic_base(env, run->apic_base);
996

    
997
    return 0;
998
}
999

    
1000
static int kvm_handle_halt(CPUState *env)
1001
{
1002
    if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
1003
          (env->eflags & IF_MASK)) &&
1004
        !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
1005
        env->halted = 1;
1006
        env->exception_index = EXCP_HLT;
1007
        return 0;
1008
    }
1009

    
1010
    return 1;
1011
}
1012

    
1013
int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
1014
{
1015
    int ret = 0;
1016

    
1017
    switch (run->exit_reason) {
1018
    case KVM_EXIT_HLT:
1019
        dprintf("handle_hlt\n");
1020
        ret = kvm_handle_halt(env);
1021
        break;
1022
    }
1023

    
1024
    return ret;
1025
}
1026

    
1027
#ifdef KVM_CAP_SET_GUEST_DEBUG
1028
int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
1029
{
1030
    static const uint8_t int3 = 0xcc;
1031

    
1032
    if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
1033
        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1))
1034
        return -EINVAL;
1035
    return 0;
1036
}
1037

    
1038
int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
1039
{
1040
    uint8_t int3;
1041

    
1042
    if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
1043
        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
1044
        return -EINVAL;
1045
    return 0;
1046
}
1047

    
1048
static struct {
1049
    target_ulong addr;
1050
    int len;
1051
    int type;
1052
} hw_breakpoint[4];
1053

    
1054
static int nb_hw_breakpoint;
1055

    
1056
static int find_hw_breakpoint(target_ulong addr, int len, int type)
1057
{
1058
    int n;
1059

    
1060
    for (n = 0; n < nb_hw_breakpoint; n++)
1061
        if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
1062
            (hw_breakpoint[n].len == len || len == -1))
1063
            return n;
1064
    return -1;
1065
}
1066

    
1067
int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1068
                                  target_ulong len, int type)
1069
{
1070
    switch (type) {
1071
    case GDB_BREAKPOINT_HW:
1072
        len = 1;
1073
        break;
1074
    case GDB_WATCHPOINT_WRITE:
1075
    case GDB_WATCHPOINT_ACCESS:
1076
        switch (len) {
1077
        case 1:
1078
            break;
1079
        case 2:
1080
        case 4:
1081
        case 8:
1082
            if (addr & (len - 1))
1083
                return -EINVAL;
1084
            break;
1085
        default:
1086
            return -EINVAL;
1087
        }
1088
        break;
1089
    default:
1090
        return -ENOSYS;
1091
    }
1092

    
1093
    if (nb_hw_breakpoint == 4)
1094
        return -ENOBUFS;
1095

    
1096
    if (find_hw_breakpoint(addr, len, type) >= 0)
1097
        return -EEXIST;
1098

    
1099
    hw_breakpoint[nb_hw_breakpoint].addr = addr;
1100
    hw_breakpoint[nb_hw_breakpoint].len = len;
1101
    hw_breakpoint[nb_hw_breakpoint].type = type;
1102
    nb_hw_breakpoint++;
1103

    
1104
    return 0;
1105
}
1106

    
1107
int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1108
                                  target_ulong len, int type)
1109
{
1110
    int n;
1111

    
1112
    n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
1113
    if (n < 0)
1114
        return -ENOENT;
1115

    
1116
    nb_hw_breakpoint--;
1117
    hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
1118

    
1119
    return 0;
1120
}
1121

    
1122
void kvm_arch_remove_all_hw_breakpoints(void)
1123
{
1124
    nb_hw_breakpoint = 0;
1125
}
1126

    
1127
static CPUWatchpoint hw_watchpoint;
1128

    
1129
int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
1130
{
1131
    int handle = 0;
1132
    int n;
1133

    
1134
    if (arch_info->exception == 1) {
1135
        if (arch_info->dr6 & (1 << 14)) {
1136
            if (cpu_single_env->singlestep_enabled)
1137
                handle = 1;
1138
        } else {
1139
            for (n = 0; n < 4; n++)
1140
                if (arch_info->dr6 & (1 << n))
1141
                    switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
1142
                    case 0x0:
1143
                        handle = 1;
1144
                        break;
1145
                    case 0x1:
1146
                        handle = 1;
1147
                        cpu_single_env->watchpoint_hit = &hw_watchpoint;
1148
                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
1149
                        hw_watchpoint.flags = BP_MEM_WRITE;
1150
                        break;
1151
                    case 0x3:
1152
                        handle = 1;
1153
                        cpu_single_env->watchpoint_hit = &hw_watchpoint;
1154
                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
1155
                        hw_watchpoint.flags = BP_MEM_ACCESS;
1156
                        break;
1157
                    }
1158
        }
1159
    } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc))
1160
        handle = 1;
1161

    
1162
    if (!handle) {
1163
        cpu_synchronize_state(cpu_single_env);
1164
        assert(cpu_single_env->exception_injected == -1);
1165

    
1166
        cpu_single_env->exception_injected = arch_info->exception;
1167
        cpu_single_env->has_error_code = 0;
1168
    }
1169

    
1170
    return handle;
1171
}
1172

    
1173
void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
1174
{
1175
    const uint8_t type_code[] = {
1176
        [GDB_BREAKPOINT_HW] = 0x0,
1177
        [GDB_WATCHPOINT_WRITE] = 0x1,
1178
        [GDB_WATCHPOINT_ACCESS] = 0x3
1179
    };
1180
    const uint8_t len_code[] = {
1181
        [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
1182
    };
1183
    int n;
1184

    
1185
    if (kvm_sw_breakpoints_active(env))
1186
        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1187

    
1188
    if (nb_hw_breakpoint > 0) {
1189
        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1190
        dbg->arch.debugreg[7] = 0x0600;
1191
        for (n = 0; n < nb_hw_breakpoint; n++) {
1192
            dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
1193
            dbg->arch.debugreg[7] |= (2 << (n * 2)) |
1194
                (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
1195
                (len_code[hw_breakpoint[n].len] << (18 + n*4));
1196
        }
1197
    }
1198
}
1199
#endif /* KVM_CAP_SET_GUEST_DEBUG */