Statistics
| Branch: | Revision:

root / target-i386 / kvm.c @ a0fb002c

History | View | Annotate | Download (28.6 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright (C) 2006-2008 Qumranet Technologies
5
 * Copyright IBM, Corp. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *
10
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11
 * See the COPYING file in the top-level directory.
12
 *
13
 */
14

    
15
#include <sys/types.h>
16
#include <sys/ioctl.h>
17
#include <sys/mman.h>
18

    
19
#include <linux/kvm.h>
20

    
21
#include "qemu-common.h"
22
#include "sysemu.h"
23
#include "kvm.h"
24
#include "cpu.h"
25
#include "gdbstub.h"
26
#include "host-utils.h"
27

    
28
//#define DEBUG_KVM
29

    
30
#ifdef DEBUG_KVM
31
#define dprintf(fmt, ...) \
32
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
33
#else
34
#define dprintf(fmt, ...) \
35
    do { } while (0)
36
#endif
37

    
38
#ifdef KVM_CAP_EXT_CPUID
39

    
40
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
41
{
42
    struct kvm_cpuid2 *cpuid;
43
    int r, size;
44

    
45
    size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
46
    cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
47
    cpuid->nent = max;
48
    r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
49
    if (r == 0 && cpuid->nent >= max) {
50
        r = -E2BIG;
51
    }
52
    if (r < 0) {
53
        if (r == -E2BIG) {
54
            qemu_free(cpuid);
55
            return NULL;
56
        } else {
57
            fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
58
                    strerror(-r));
59
            exit(1);
60
        }
61
    }
62
    return cpuid;
63
}
64

    
65
uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
66
{
67
    struct kvm_cpuid2 *cpuid;
68
    int i, max;
69
    uint32_t ret = 0;
70
    uint32_t cpuid_1_edx;
71

    
72
    if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
73
        return -1U;
74
    }
75

    
76
    max = 1;
77
    while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
78
        max *= 2;
79
    }
80

    
81
    for (i = 0; i < cpuid->nent; ++i) {
82
        if (cpuid->entries[i].function == function) {
83
            switch (reg) {
84
            case R_EAX:
85
                ret = cpuid->entries[i].eax;
86
                break;
87
            case R_EBX:
88
                ret = cpuid->entries[i].ebx;
89
                break;
90
            case R_ECX:
91
                ret = cpuid->entries[i].ecx;
92
                break;
93
            case R_EDX:
94
                ret = cpuid->entries[i].edx;
95
                if (function == 0x80000001) {
96
                    /* On Intel, kvm returns cpuid according to the Intel spec,
97
                     * so add missing bits according to the AMD spec:
98
                     */
99
                    cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
100
                    ret |= cpuid_1_edx & 0xdfeff7ff;
101
                }
102
                break;
103
            }
104
        }
105
    }
106

    
107
    qemu_free(cpuid);
108

    
109
    return ret;
110
}
111

    
112
#else
113

    
114
uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
115
{
116
    return -1U;
117
}
118

    
119
#endif
120

    
121
static void kvm_trim_features(uint32_t *features, uint32_t supported)
122
{
123
    int i;
124
    uint32_t mask;
125

    
126
    for (i = 0; i < 32; ++i) {
127
        mask = 1U << i;
128
        if ((*features & mask) && !(supported & mask)) {
129
            *features &= ~mask;
130
        }
131
    }
132
}
133

    
134
int kvm_arch_init_vcpu(CPUState *env)
135
{
136
    struct {
137
        struct kvm_cpuid2 cpuid;
138
        struct kvm_cpuid_entry2 entries[100];
139
    } __attribute__((packed)) cpuid_data;
140
    uint32_t limit, i, j, cpuid_i;
141
    uint32_t unused;
142

    
143
    env->mp_state = KVM_MP_STATE_RUNNABLE;
144

    
145
    kvm_trim_features(&env->cpuid_features,
146
        kvm_arch_get_supported_cpuid(env, 1, R_EDX));
147

    
148
    i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
149
    kvm_trim_features(&env->cpuid_ext_features,
150
        kvm_arch_get_supported_cpuid(env, 1, R_ECX));
151
    env->cpuid_ext_features |= i;
152

    
153
    kvm_trim_features(&env->cpuid_ext2_features,
154
        kvm_arch_get_supported_cpuid(env, 0x80000001, R_EDX));
155
    kvm_trim_features(&env->cpuid_ext3_features,
156
        kvm_arch_get_supported_cpuid(env, 0x80000001, R_ECX));
157

    
158
    cpuid_i = 0;
159

    
160
    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
161

    
162
    for (i = 0; i <= limit; i++) {
163
        struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
164

    
165
        switch (i) {
166
        case 2: {
167
            /* Keep reading function 2 till all the input is received */
168
            int times;
169

    
170
            c->function = i;
171
            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
172
                       KVM_CPUID_FLAG_STATE_READ_NEXT;
173
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
174
            times = c->eax & 0xff;
175

    
176
            for (j = 1; j < times; ++j) {
177
                c = &cpuid_data.entries[cpuid_i++];
178
                c->function = i;
179
                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
180
                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
181
            }
182
            break;
183
        }
184
        case 4:
185
        case 0xb:
186
        case 0xd:
187
            for (j = 0; ; j++) {
188
                c->function = i;
189
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
190
                c->index = j;
191
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
192

    
193
                if (i == 4 && c->eax == 0)
194
                    break;
195
                if (i == 0xb && !(c->ecx & 0xff00))
196
                    break;
197
                if (i == 0xd && c->eax == 0)
198
                    break;
199

    
200
                c = &cpuid_data.entries[cpuid_i++];
201
            }
202
            break;
203
        default:
204
            c->function = i;
205
            c->flags = 0;
206
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
207
            break;
208
        }
209
    }
210
    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
211

    
212
    for (i = 0x80000000; i <= limit; i++) {
213
        struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
214

    
215
        c->function = i;
216
        c->flags = 0;
217
        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
218
    }
219

    
220
    cpuid_data.cpuid.nent = cpuid_i;
221

    
222
    return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
223
}
224

    
225
void kvm_arch_reset_vcpu(CPUState *env)
226
{
227
    env->interrupt_injected = -1;
228
    env->nmi_injected = 0;
229
    env->nmi_pending = 0;
230
}
231

    
232
static int kvm_has_msr_star(CPUState *env)
233
{
234
    static int has_msr_star;
235
    int ret;
236

    
237
    /* first time */
238
    if (has_msr_star == 0) {        
239
        struct kvm_msr_list msr_list, *kvm_msr_list;
240

    
241
        has_msr_star = -1;
242

    
243
        /* Obtain MSR list from KVM.  These are the MSRs that we must
244
         * save/restore */
245
        msr_list.nmsrs = 0;
246
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
247
        if (ret < 0)
248
            return 0;
249

    
250
        /* Old kernel modules had a bug and could write beyond the provided
251
           memory. Allocate at least a safe amount of 1K. */
252
        kvm_msr_list = qemu_mallocz(MAX(1024, sizeof(msr_list) +
253
                                              msr_list.nmsrs *
254
                                              sizeof(msr_list.indices[0])));
255

    
256
        kvm_msr_list->nmsrs = msr_list.nmsrs;
257
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
258
        if (ret >= 0) {
259
            int i;
260

    
261
            for (i = 0; i < kvm_msr_list->nmsrs; i++) {
262
                if (kvm_msr_list->indices[i] == MSR_STAR) {
263
                    has_msr_star = 1;
264
                    break;
265
                }
266
            }
267
        }
268

    
269
        free(kvm_msr_list);
270
    }
271

    
272
    if (has_msr_star == 1)
273
        return 1;
274
    return 0;
275
}
276

    
277
int kvm_arch_init(KVMState *s, int smp_cpus)
278
{
279
    int ret;
280

    
281
    /* create vm86 tss.  KVM uses vm86 mode to emulate 16-bit code
282
     * directly.  In order to use vm86 mode, a TSS is needed.  Since this
283
     * must be part of guest physical memory, we need to allocate it.  Older
284
     * versions of KVM just assumed that it would be at the end of physical
285
     * memory but that doesn't work with more than 4GB of memory.  We simply
286
     * refuse to work with those older versions of KVM. */
287
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
288
    if (ret <= 0) {
289
        fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
290
        return ret;
291
    }
292

    
293
    /* this address is 3 pages before the bios, and the bios should present
294
     * as unavaible memory.  FIXME, need to ensure the e820 map deals with
295
     * this?
296
     */
297
    return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
298
}
299
                    
300
static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
301
{
302
    lhs->selector = rhs->selector;
303
    lhs->base = rhs->base;
304
    lhs->limit = rhs->limit;
305
    lhs->type = 3;
306
    lhs->present = 1;
307
    lhs->dpl = 3;
308
    lhs->db = 0;
309
    lhs->s = 1;
310
    lhs->l = 0;
311
    lhs->g = 0;
312
    lhs->avl = 0;
313
    lhs->unusable = 0;
314
}
315

    
316
static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
317
{
318
    unsigned flags = rhs->flags;
319
    lhs->selector = rhs->selector;
320
    lhs->base = rhs->base;
321
    lhs->limit = rhs->limit;
322
    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
323
    lhs->present = (flags & DESC_P_MASK) != 0;
324
    lhs->dpl = rhs->selector & 3;
325
    lhs->db = (flags >> DESC_B_SHIFT) & 1;
326
    lhs->s = (flags & DESC_S_MASK) != 0;
327
    lhs->l = (flags >> DESC_L_SHIFT) & 1;
328
    lhs->g = (flags & DESC_G_MASK) != 0;
329
    lhs->avl = (flags & DESC_AVL_MASK) != 0;
330
    lhs->unusable = 0;
331
}
332

    
333
static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
334
{
335
    lhs->selector = rhs->selector;
336
    lhs->base = rhs->base;
337
    lhs->limit = rhs->limit;
338
    lhs->flags =
339
        (rhs->type << DESC_TYPE_SHIFT)
340
        | (rhs->present * DESC_P_MASK)
341
        | (rhs->dpl << DESC_DPL_SHIFT)
342
        | (rhs->db << DESC_B_SHIFT)
343
        | (rhs->s * DESC_S_MASK)
344
        | (rhs->l << DESC_L_SHIFT)
345
        | (rhs->g * DESC_G_MASK)
346
        | (rhs->avl * DESC_AVL_MASK);
347
}
348

    
349
static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
350
{
351
    if (set)
352
        *kvm_reg = *qemu_reg;
353
    else
354
        *qemu_reg = *kvm_reg;
355
}
356

    
357
static int kvm_getput_regs(CPUState *env, int set)
358
{
359
    struct kvm_regs regs;
360
    int ret = 0;
361

    
362
    if (!set) {
363
        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
364
        if (ret < 0)
365
            return ret;
366
    }
367

    
368
    kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
369
    kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
370
    kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
371
    kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
372
    kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
373
    kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
374
    kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
375
    kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
376
#ifdef TARGET_X86_64
377
    kvm_getput_reg(&regs.r8, &env->regs[8], set);
378
    kvm_getput_reg(&regs.r9, &env->regs[9], set);
379
    kvm_getput_reg(&regs.r10, &env->regs[10], set);
380
    kvm_getput_reg(&regs.r11, &env->regs[11], set);
381
    kvm_getput_reg(&regs.r12, &env->regs[12], set);
382
    kvm_getput_reg(&regs.r13, &env->regs[13], set);
383
    kvm_getput_reg(&regs.r14, &env->regs[14], set);
384
    kvm_getput_reg(&regs.r15, &env->regs[15], set);
385
#endif
386

    
387
    kvm_getput_reg(&regs.rflags, &env->eflags, set);
388
    kvm_getput_reg(&regs.rip, &env->eip, set);
389

    
390
    if (set)
391
        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
392

    
393
    return ret;
394
}
395

    
396
static int kvm_put_fpu(CPUState *env)
397
{
398
    struct kvm_fpu fpu;
399
    int i;
400

    
401
    memset(&fpu, 0, sizeof fpu);
402
    fpu.fsw = env->fpus & ~(7 << 11);
403
    fpu.fsw |= (env->fpstt & 7) << 11;
404
    fpu.fcw = env->fpuc;
405
    for (i = 0; i < 8; ++i)
406
        fpu.ftwx |= (!env->fptags[i]) << i;
407
    memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
408
    memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
409
    fpu.mxcsr = env->mxcsr;
410

    
411
    return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
412
}
413

    
414
static int kvm_put_sregs(CPUState *env)
415
{
416
    struct kvm_sregs sregs;
417

    
418
    memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
419
    if (env->interrupt_injected >= 0) {
420
        sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
421
                (uint64_t)1 << (env->interrupt_injected % 64);
422
    }
423

    
424
    if ((env->eflags & VM_MASK)) {
425
            set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
426
            set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
427
            set_v8086_seg(&sregs.es, &env->segs[R_ES]);
428
            set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
429
            set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
430
            set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
431
    } else {
432
            set_seg(&sregs.cs, &env->segs[R_CS]);
433
            set_seg(&sregs.ds, &env->segs[R_DS]);
434
            set_seg(&sregs.es, &env->segs[R_ES]);
435
            set_seg(&sregs.fs, &env->segs[R_FS]);
436
            set_seg(&sregs.gs, &env->segs[R_GS]);
437
            set_seg(&sregs.ss, &env->segs[R_SS]);
438

    
439
            if (env->cr[0] & CR0_PE_MASK) {
440
                /* force ss cpl to cs cpl */
441
                sregs.ss.selector = (sregs.ss.selector & ~3) |
442
                        (sregs.cs.selector & 3);
443
                sregs.ss.dpl = sregs.ss.selector & 3;
444
            }
445
    }
446

    
447
    set_seg(&sregs.tr, &env->tr);
448
    set_seg(&sregs.ldt, &env->ldt);
449

    
450
    sregs.idt.limit = env->idt.limit;
451
    sregs.idt.base = env->idt.base;
452
    sregs.gdt.limit = env->gdt.limit;
453
    sregs.gdt.base = env->gdt.base;
454

    
455
    sregs.cr0 = env->cr[0];
456
    sregs.cr2 = env->cr[2];
457
    sregs.cr3 = env->cr[3];
458
    sregs.cr4 = env->cr[4];
459

    
460
    sregs.cr8 = cpu_get_apic_tpr(env);
461
    sregs.apic_base = cpu_get_apic_base(env);
462

    
463
    sregs.efer = env->efer;
464

    
465
    return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
466
}
467

    
468
static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
469
                              uint32_t index, uint64_t value)
470
{
471
    entry->index = index;
472
    entry->data = value;
473
}
474

    
475
static int kvm_put_msrs(CPUState *env)
476
{
477
    struct {
478
        struct kvm_msrs info;
479
        struct kvm_msr_entry entries[100];
480
    } msr_data;
481
    struct kvm_msr_entry *msrs = msr_data.entries;
482
    int n = 0;
483

    
484
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
485
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
486
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
487
    if (kvm_has_msr_star(env))
488
        kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
489
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
490
#ifdef TARGET_X86_64
491
    /* FIXME if lm capable */
492
    kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
493
    kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
494
    kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
495
    kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
496
#endif
497
    msr_data.info.nmsrs = n;
498

    
499
    return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
500

    
501
}
502

    
503

    
504
static int kvm_get_fpu(CPUState *env)
505
{
506
    struct kvm_fpu fpu;
507
    int i, ret;
508

    
509
    ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
510
    if (ret < 0)
511
        return ret;
512

    
513
    env->fpstt = (fpu.fsw >> 11) & 7;
514
    env->fpus = fpu.fsw;
515
    env->fpuc = fpu.fcw;
516
    for (i = 0; i < 8; ++i)
517
        env->fptags[i] = !((fpu.ftwx >> i) & 1);
518
    memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
519
    memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
520
    env->mxcsr = fpu.mxcsr;
521

    
522
    return 0;
523
}
524

    
525
static int kvm_get_sregs(CPUState *env)
526
{
527
    struct kvm_sregs sregs;
528
    uint32_t hflags;
529
    int bit, i, ret;
530

    
531
    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
532
    if (ret < 0)
533
        return ret;
534

    
535
    /* There can only be one pending IRQ set in the bitmap at a time, so try
536
       to find it and save its number instead (-1 for none). */
537
    env->interrupt_injected = -1;
538
    for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
539
        if (sregs.interrupt_bitmap[i]) {
540
            bit = ctz64(sregs.interrupt_bitmap[i]);
541
            env->interrupt_injected = i * 64 + bit;
542
            break;
543
        }
544
    }
545

    
546
    get_seg(&env->segs[R_CS], &sregs.cs);
547
    get_seg(&env->segs[R_DS], &sregs.ds);
548
    get_seg(&env->segs[R_ES], &sregs.es);
549
    get_seg(&env->segs[R_FS], &sregs.fs);
550
    get_seg(&env->segs[R_GS], &sregs.gs);
551
    get_seg(&env->segs[R_SS], &sregs.ss);
552

    
553
    get_seg(&env->tr, &sregs.tr);
554
    get_seg(&env->ldt, &sregs.ldt);
555

    
556
    env->idt.limit = sregs.idt.limit;
557
    env->idt.base = sregs.idt.base;
558
    env->gdt.limit = sregs.gdt.limit;
559
    env->gdt.base = sregs.gdt.base;
560

    
561
    env->cr[0] = sregs.cr0;
562
    env->cr[2] = sregs.cr2;
563
    env->cr[3] = sregs.cr3;
564
    env->cr[4] = sregs.cr4;
565

    
566
    cpu_set_apic_base(env, sregs.apic_base);
567

    
568
    env->efer = sregs.efer;
569
    //cpu_set_apic_tpr(env, sregs.cr8);
570

    
571
#define HFLAG_COPY_MASK ~( \
572
                        HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
573
                        HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
574
                        HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
575
                        HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
576

    
577

    
578

    
579
    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
580
    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
581
    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
582
            (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
583
    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
584
    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
585
            (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
586

    
587
    if (env->efer & MSR_EFER_LMA) {
588
        hflags |= HF_LMA_MASK;
589
    }
590

    
591
    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
592
        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
593
    } else {
594
        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
595
                (DESC_B_SHIFT - HF_CS32_SHIFT);
596
        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
597
                (DESC_B_SHIFT - HF_SS32_SHIFT);
598
        if (!(env->cr[0] & CR0_PE_MASK) ||
599
                   (env->eflags & VM_MASK) ||
600
                   !(hflags & HF_CS32_MASK)) {
601
                hflags |= HF_ADDSEG_MASK;
602
            } else {
603
                hflags |= ((env->segs[R_DS].base |
604
                                env->segs[R_ES].base |
605
                                env->segs[R_SS].base) != 0) <<
606
                    HF_ADDSEG_SHIFT;
607
            }
608
    }
609
    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
610

    
611
    return 0;
612
}
613

    
614
static int kvm_get_msrs(CPUState *env)
615
{
616
    struct {
617
        struct kvm_msrs info;
618
        struct kvm_msr_entry entries[100];
619
    } msr_data;
620
    struct kvm_msr_entry *msrs = msr_data.entries;
621
    int ret, i, n;
622

    
623
    n = 0;
624
    msrs[n++].index = MSR_IA32_SYSENTER_CS;
625
    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
626
    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
627
    if (kvm_has_msr_star(env))
628
        msrs[n++].index = MSR_STAR;
629
    msrs[n++].index = MSR_IA32_TSC;
630
#ifdef TARGET_X86_64
631
    /* FIXME lm_capable_kernel */
632
    msrs[n++].index = MSR_CSTAR;
633
    msrs[n++].index = MSR_KERNELGSBASE;
634
    msrs[n++].index = MSR_FMASK;
635
    msrs[n++].index = MSR_LSTAR;
636
#endif
637
    msr_data.info.nmsrs = n;
638
    ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
639
    if (ret < 0)
640
        return ret;
641

    
642
    for (i = 0; i < ret; i++) {
643
        switch (msrs[i].index) {
644
        case MSR_IA32_SYSENTER_CS:
645
            env->sysenter_cs = msrs[i].data;
646
            break;
647
        case MSR_IA32_SYSENTER_ESP:
648
            env->sysenter_esp = msrs[i].data;
649
            break;
650
        case MSR_IA32_SYSENTER_EIP:
651
            env->sysenter_eip = msrs[i].data;
652
            break;
653
        case MSR_STAR:
654
            env->star = msrs[i].data;
655
            break;
656
#ifdef TARGET_X86_64
657
        case MSR_CSTAR:
658
            env->cstar = msrs[i].data;
659
            break;
660
        case MSR_KERNELGSBASE:
661
            env->kernelgsbase = msrs[i].data;
662
            break;
663
        case MSR_FMASK:
664
            env->fmask = msrs[i].data;
665
            break;
666
        case MSR_LSTAR:
667
            env->lstar = msrs[i].data;
668
            break;
669
#endif
670
        case MSR_IA32_TSC:
671
            env->tsc = msrs[i].data;
672
            break;
673
        }
674
    }
675

    
676
    return 0;
677
}
678

    
679
static int kvm_put_mp_state(CPUState *env)
680
{
681
    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
682

    
683
    return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
684
}
685

    
686
static int kvm_get_mp_state(CPUState *env)
687
{
688
    struct kvm_mp_state mp_state;
689
    int ret;
690

    
691
    ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
692
    if (ret < 0) {
693
        return ret;
694
    }
695
    env->mp_state = mp_state.mp_state;
696
    return 0;
697
}
698

    
699
static int kvm_put_vcpu_events(CPUState *env)
700
{
701
#ifdef KVM_CAP_VCPU_EVENTS
702
    struct kvm_vcpu_events events;
703

    
704
    if (!kvm_has_vcpu_events()) {
705
        return 0;
706
    }
707

    
708
    events.exception.injected = (env->exception_index >= 0);
709
    events.exception.nr = env->exception_index;
710
    events.exception.has_error_code = env->has_error_code;
711
    events.exception.error_code = env->error_code;
712

    
713
    events.interrupt.injected = (env->interrupt_injected >= 0);
714
    events.interrupt.nr = env->interrupt_injected;
715
    events.interrupt.soft = env->soft_interrupt;
716

    
717
    events.nmi.injected = env->nmi_injected;
718
    events.nmi.pending = env->nmi_pending;
719
    events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
720

    
721
    events.sipi_vector = env->sipi_vector;
722

    
723
    return kvm_vcpu_ioctl(env, KVM_SET_VCPU_EVENTS, &events);
724
#else
725
    return 0;
726
#endif
727
}
728

    
729
static int kvm_get_vcpu_events(CPUState *env)
730
{
731
#ifdef KVM_CAP_VCPU_EVENTS
732
    struct kvm_vcpu_events events;
733
    int ret;
734

    
735
    if (!kvm_has_vcpu_events()) {
736
        return 0;
737
    }
738

    
739
    ret = kvm_vcpu_ioctl(env, KVM_GET_VCPU_EVENTS, &events);
740
    if (ret < 0) {
741
       return ret;
742
    }
743
    env->exception_index =
744
       events.exception.injected ? events.exception.nr : -1;
745
    env->has_error_code = events.exception.has_error_code;
746
    env->error_code = events.exception.error_code;
747

    
748
    env->interrupt_injected =
749
        events.interrupt.injected ? events.interrupt.nr : -1;
750
    env->soft_interrupt = events.interrupt.soft;
751

    
752
    env->nmi_injected = events.nmi.injected;
753
    env->nmi_pending = events.nmi.pending;
754
    if (events.nmi.masked) {
755
        env->hflags2 |= HF2_NMI_MASK;
756
    } else {
757
        env->hflags2 &= ~HF2_NMI_MASK;
758
    }
759

    
760
    env->sipi_vector = events.sipi_vector;
761
#endif
762

    
763
    return 0;
764
}
765

    
766
int kvm_arch_put_registers(CPUState *env)
767
{
768
    int ret;
769

    
770
    ret = kvm_getput_regs(env, 1);
771
    if (ret < 0)
772
        return ret;
773

    
774
    ret = kvm_put_fpu(env);
775
    if (ret < 0)
776
        return ret;
777

    
778
    ret = kvm_put_sregs(env);
779
    if (ret < 0)
780
        return ret;
781

    
782
    ret = kvm_put_msrs(env);
783
    if (ret < 0)
784
        return ret;
785

    
786
    ret = kvm_put_mp_state(env);
787
    if (ret < 0)
788
        return ret;
789

    
790
    ret = kvm_put_vcpu_events(env);
791
    if (ret < 0)
792
        return ret;
793

    
794
    return 0;
795
}
796

    
797
int kvm_arch_get_registers(CPUState *env)
798
{
799
    int ret;
800

    
801
    ret = kvm_getput_regs(env, 0);
802
    if (ret < 0)
803
        return ret;
804

    
805
    ret = kvm_get_fpu(env);
806
    if (ret < 0)
807
        return ret;
808

    
809
    ret = kvm_get_sregs(env);
810
    if (ret < 0)
811
        return ret;
812

    
813
    ret = kvm_get_msrs(env);
814
    if (ret < 0)
815
        return ret;
816

    
817
    ret = kvm_get_mp_state(env);
818
    if (ret < 0)
819
        return ret;
820

    
821
    ret = kvm_get_vcpu_events(env);
822
    if (ret < 0)
823
        return ret;
824

    
825
    return 0;
826
}
827

    
828
int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
829
{
830
    /* Try to inject an interrupt if the guest can accept it */
831
    if (run->ready_for_interrupt_injection &&
832
        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
833
        (env->eflags & IF_MASK)) {
834
        int irq;
835

    
836
        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
837
        irq = cpu_get_pic_interrupt(env);
838
        if (irq >= 0) {
839
            struct kvm_interrupt intr;
840
            intr.irq = irq;
841
            /* FIXME: errors */
842
            dprintf("injected interrupt %d\n", irq);
843
            kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
844
        }
845
    }
846

    
847
    /* If we have an interrupt but the guest is not ready to receive an
848
     * interrupt, request an interrupt window exit.  This will
849
     * cause a return to userspace as soon as the guest is ready to
850
     * receive interrupts. */
851
    if ((env->interrupt_request & CPU_INTERRUPT_HARD))
852
        run->request_interrupt_window = 1;
853
    else
854
        run->request_interrupt_window = 0;
855

    
856
    dprintf("setting tpr\n");
857
    run->cr8 = cpu_get_apic_tpr(env);
858

    
859
    return 0;
860
}
861

    
862
int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
863
{
864
    if (run->if_flag)
865
        env->eflags |= IF_MASK;
866
    else
867
        env->eflags &= ~IF_MASK;
868
    
869
    cpu_set_apic_tpr(env, run->cr8);
870
    cpu_set_apic_base(env, run->apic_base);
871

    
872
    return 0;
873
}
874

    
875
static int kvm_handle_halt(CPUState *env)
876
{
877
    if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
878
          (env->eflags & IF_MASK)) &&
879
        !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
880
        env->halted = 1;
881
        env->exception_index = EXCP_HLT;
882
        return 0;
883
    }
884

    
885
    return 1;
886
}
887

    
888
int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
889
{
890
    int ret = 0;
891

    
892
    switch (run->exit_reason) {
893
    case KVM_EXIT_HLT:
894
        dprintf("handle_hlt\n");
895
        ret = kvm_handle_halt(env);
896
        break;
897
    }
898

    
899
    return ret;
900
}
901

    
902
#ifdef KVM_CAP_SET_GUEST_DEBUG
903
int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
904
{
905
    static const uint8_t int3 = 0xcc;
906

    
907
    if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
908
        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1))
909
        return -EINVAL;
910
    return 0;
911
}
912

    
913
int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
914
{
915
    uint8_t int3;
916

    
917
    if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
918
        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
919
        return -EINVAL;
920
    return 0;
921
}
922

    
923
static struct {
924
    target_ulong addr;
925
    int len;
926
    int type;
927
} hw_breakpoint[4];
928

    
929
static int nb_hw_breakpoint;
930

    
931
static int find_hw_breakpoint(target_ulong addr, int len, int type)
932
{
933
    int n;
934

    
935
    for (n = 0; n < nb_hw_breakpoint; n++)
936
        if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
937
            (hw_breakpoint[n].len == len || len == -1))
938
            return n;
939
    return -1;
940
}
941

    
942
int kvm_arch_insert_hw_breakpoint(target_ulong addr,
943
                                  target_ulong len, int type)
944
{
945
    switch (type) {
946
    case GDB_BREAKPOINT_HW:
947
        len = 1;
948
        break;
949
    case GDB_WATCHPOINT_WRITE:
950
    case GDB_WATCHPOINT_ACCESS:
951
        switch (len) {
952
        case 1:
953
            break;
954
        case 2:
955
        case 4:
956
        case 8:
957
            if (addr & (len - 1))
958
                return -EINVAL;
959
            break;
960
        default:
961
            return -EINVAL;
962
        }
963
        break;
964
    default:
965
        return -ENOSYS;
966
    }
967

    
968
    if (nb_hw_breakpoint == 4)
969
        return -ENOBUFS;
970

    
971
    if (find_hw_breakpoint(addr, len, type) >= 0)
972
        return -EEXIST;
973

    
974
    hw_breakpoint[nb_hw_breakpoint].addr = addr;
975
    hw_breakpoint[nb_hw_breakpoint].len = len;
976
    hw_breakpoint[nb_hw_breakpoint].type = type;
977
    nb_hw_breakpoint++;
978

    
979
    return 0;
980
}
981

    
982
int kvm_arch_remove_hw_breakpoint(target_ulong addr,
983
                                  target_ulong len, int type)
984
{
985
    int n;
986

    
987
    n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
988
    if (n < 0)
989
        return -ENOENT;
990

    
991
    nb_hw_breakpoint--;
992
    hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
993

    
994
    return 0;
995
}
996

    
997
void kvm_arch_remove_all_hw_breakpoints(void)
998
{
999
    nb_hw_breakpoint = 0;
1000
}
1001

    
1002
static CPUWatchpoint hw_watchpoint;
1003

    
1004
int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
1005
{
1006
    int handle = 0;
1007
    int n;
1008

    
1009
    if (arch_info->exception == 1) {
1010
        if (arch_info->dr6 & (1 << 14)) {
1011
            if (cpu_single_env->singlestep_enabled)
1012
                handle = 1;
1013
        } else {
1014
            for (n = 0; n < 4; n++)
1015
                if (arch_info->dr6 & (1 << n))
1016
                    switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
1017
                    case 0x0:
1018
                        handle = 1;
1019
                        break;
1020
                    case 0x1:
1021
                        handle = 1;
1022
                        cpu_single_env->watchpoint_hit = &hw_watchpoint;
1023
                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
1024
                        hw_watchpoint.flags = BP_MEM_WRITE;
1025
                        break;
1026
                    case 0x3:
1027
                        handle = 1;
1028
                        cpu_single_env->watchpoint_hit = &hw_watchpoint;
1029
                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
1030
                        hw_watchpoint.flags = BP_MEM_ACCESS;
1031
                        break;
1032
                    }
1033
        }
1034
    } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc))
1035
        handle = 1;
1036

    
1037
    if (!handle)
1038
        kvm_update_guest_debug(cpu_single_env,
1039
                        (arch_info->exception == 1) ?
1040
                        KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
1041

    
1042
    return handle;
1043
}
1044

    
1045
void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
1046
{
1047
    const uint8_t type_code[] = {
1048
        [GDB_BREAKPOINT_HW] = 0x0,
1049
        [GDB_WATCHPOINT_WRITE] = 0x1,
1050
        [GDB_WATCHPOINT_ACCESS] = 0x3
1051
    };
1052
    const uint8_t len_code[] = {
1053
        [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
1054
    };
1055
    int n;
1056

    
1057
    if (kvm_sw_breakpoints_active(env))
1058
        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1059

    
1060
    if (nb_hw_breakpoint > 0) {
1061
        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1062
        dbg->arch.debugreg[7] = 0x0600;
1063
        for (n = 0; n < nb_hw_breakpoint; n++) {
1064
            dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
1065
            dbg->arch.debugreg[7] |= (2 << (n * 2)) |
1066
                (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
1067
                (len_code[hw_breakpoint[n].len] << (18 + n*4));
1068
        }
1069
    }
1070
}
1071
#endif /* KVM_CAP_SET_GUEST_DEBUG */