Statistics
| Branch: | Revision:

root / target-i386 / kvm.c @ 0e607a80

History | View | Annotate | Download (26.7 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright (C) 2006-2008 Qumranet Technologies
5
 * Copyright IBM, Corp. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *
10
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11
 * See the COPYING file in the top-level directory.
12
 *
13
 */
14

    
15
#include <sys/types.h>
16
#include <sys/ioctl.h>
17
#include <sys/mman.h>
18

    
19
#include <linux/kvm.h>
20

    
21
#include "qemu-common.h"
22
#include "sysemu.h"
23
#include "kvm.h"
24
#include "cpu.h"
25
#include "gdbstub.h"
26
#include "host-utils.h"
27

    
28
//#define DEBUG_KVM
29

    
30
#ifdef DEBUG_KVM
31
#define dprintf(fmt, ...) \
32
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
33
#else
34
#define dprintf(fmt, ...) \
35
    do { } while (0)
36
#endif
37

    
38
#ifdef KVM_CAP_EXT_CPUID
39

    
40
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
41
{
42
    struct kvm_cpuid2 *cpuid;
43
    int r, size;
44

    
45
    size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
46
    cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size);
47
    cpuid->nent = max;
48
    r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
49
    if (r == 0 && cpuid->nent >= max) {
50
        r = -E2BIG;
51
    }
52
    if (r < 0) {
53
        if (r == -E2BIG) {
54
            qemu_free(cpuid);
55
            return NULL;
56
        } else {
57
            fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
58
                    strerror(-r));
59
            exit(1);
60
        }
61
    }
62
    return cpuid;
63
}
64

    
65
uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
66
{
67
    struct kvm_cpuid2 *cpuid;
68
    int i, max;
69
    uint32_t ret = 0;
70
    uint32_t cpuid_1_edx;
71

    
72
    if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) {
73
        return -1U;
74
    }
75

    
76
    max = 1;
77
    while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) {
78
        max *= 2;
79
    }
80

    
81
    for (i = 0; i < cpuid->nent; ++i) {
82
        if (cpuid->entries[i].function == function) {
83
            switch (reg) {
84
            case R_EAX:
85
                ret = cpuid->entries[i].eax;
86
                break;
87
            case R_EBX:
88
                ret = cpuid->entries[i].ebx;
89
                break;
90
            case R_ECX:
91
                ret = cpuid->entries[i].ecx;
92
                break;
93
            case R_EDX:
94
                ret = cpuid->entries[i].edx;
95
                if (function == 0x80000001) {
96
                    /* On Intel, kvm returns cpuid according to the Intel spec,
97
                     * so add missing bits according to the AMD spec:
98
                     */
99
                    cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX);
100
                    ret |= cpuid_1_edx & 0xdfeff7ff;
101
                }
102
                break;
103
            }
104
        }
105
    }
106

    
107
    qemu_free(cpuid);
108

    
109
    return ret;
110
}
111

    
112
#else
113

    
114
uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg)
115
{
116
    return -1U;
117
}
118

    
119
#endif
120

    
121
static void kvm_trim_features(uint32_t *features, uint32_t supported)
122
{
123
    int i;
124
    uint32_t mask;
125

    
126
    for (i = 0; i < 32; ++i) {
127
        mask = 1U << i;
128
        if ((*features & mask) && !(supported & mask)) {
129
            *features &= ~mask;
130
        }
131
    }
132
}
133

    
134
int kvm_arch_init_vcpu(CPUState *env)
135
{
136
    struct {
137
        struct kvm_cpuid2 cpuid;
138
        struct kvm_cpuid_entry2 entries[100];
139
    } __attribute__((packed)) cpuid_data;
140
    uint32_t limit, i, j, cpuid_i;
141
    uint32_t unused;
142

    
143
    env->mp_state = KVM_MP_STATE_RUNNABLE;
144

    
145
    kvm_trim_features(&env->cpuid_features,
146
        kvm_arch_get_supported_cpuid(env, 1, R_EDX));
147

    
148
    i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
149
    kvm_trim_features(&env->cpuid_ext_features,
150
        kvm_arch_get_supported_cpuid(env, 1, R_ECX));
151
    env->cpuid_ext_features |= i;
152

    
153
    kvm_trim_features(&env->cpuid_ext2_features,
154
        kvm_arch_get_supported_cpuid(env, 0x80000001, R_EDX));
155
    kvm_trim_features(&env->cpuid_ext3_features,
156
        kvm_arch_get_supported_cpuid(env, 0x80000001, R_ECX));
157

    
158
    cpuid_i = 0;
159

    
160
    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
161

    
162
    for (i = 0; i <= limit; i++) {
163
        struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
164

    
165
        switch (i) {
166
        case 2: {
167
            /* Keep reading function 2 till all the input is received */
168
            int times;
169

    
170
            c->function = i;
171
            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
172
                       KVM_CPUID_FLAG_STATE_READ_NEXT;
173
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
174
            times = c->eax & 0xff;
175

    
176
            for (j = 1; j < times; ++j) {
177
                c = &cpuid_data.entries[cpuid_i++];
178
                c->function = i;
179
                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
180
                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
181
            }
182
            break;
183
        }
184
        case 4:
185
        case 0xb:
186
        case 0xd:
187
            for (j = 0; ; j++) {
188
                c->function = i;
189
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
190
                c->index = j;
191
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
192

    
193
                if (i == 4 && c->eax == 0)
194
                    break;
195
                if (i == 0xb && !(c->ecx & 0xff00))
196
                    break;
197
                if (i == 0xd && c->eax == 0)
198
                    break;
199

    
200
                c = &cpuid_data.entries[cpuid_i++];
201
            }
202
            break;
203
        default:
204
            c->function = i;
205
            c->flags = 0;
206
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
207
            break;
208
        }
209
    }
210
    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
211

    
212
    for (i = 0x80000000; i <= limit; i++) {
213
        struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
214

    
215
        c->function = i;
216
        c->flags = 0;
217
        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
218
    }
219

    
220
    cpuid_data.cpuid.nent = cpuid_i;
221

    
222
    return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
223
}
224

    
225
void kvm_arch_reset_vcpu(CPUState *env)
226
{
227
    env->interrupt_injected = -1;
228
}
229

    
230
static int kvm_has_msr_star(CPUState *env)
231
{
232
    static int has_msr_star;
233
    int ret;
234

    
235
    /* first time */
236
    if (has_msr_star == 0) {        
237
        struct kvm_msr_list msr_list, *kvm_msr_list;
238

    
239
        has_msr_star = -1;
240

    
241
        /* Obtain MSR list from KVM.  These are the MSRs that we must
242
         * save/restore */
243
        msr_list.nmsrs = 0;
244
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
245
        if (ret < 0)
246
            return 0;
247

    
248
        /* Old kernel modules had a bug and could write beyond the provided
249
           memory. Allocate at least a safe amount of 1K. */
250
        kvm_msr_list = qemu_mallocz(MAX(1024, sizeof(msr_list) +
251
                                              msr_list.nmsrs *
252
                                              sizeof(msr_list.indices[0])));
253

    
254
        kvm_msr_list->nmsrs = msr_list.nmsrs;
255
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
256
        if (ret >= 0) {
257
            int i;
258

    
259
            for (i = 0; i < kvm_msr_list->nmsrs; i++) {
260
                if (kvm_msr_list->indices[i] == MSR_STAR) {
261
                    has_msr_star = 1;
262
                    break;
263
                }
264
            }
265
        }
266

    
267
        free(kvm_msr_list);
268
    }
269

    
270
    if (has_msr_star == 1)
271
        return 1;
272
    return 0;
273
}
274

    
275
int kvm_arch_init(KVMState *s, int smp_cpus)
276
{
277
    int ret;
278

    
279
    /* create vm86 tss.  KVM uses vm86 mode to emulate 16-bit code
280
     * directly.  In order to use vm86 mode, a TSS is needed.  Since this
281
     * must be part of guest physical memory, we need to allocate it.  Older
282
     * versions of KVM just assumed that it would be at the end of physical
283
     * memory but that doesn't work with more than 4GB of memory.  We simply
284
     * refuse to work with those older versions of KVM. */
285
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
286
    if (ret <= 0) {
287
        fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
288
        return ret;
289
    }
290

    
291
    /* this address is 3 pages before the bios, and the bios should present
292
     * as unavaible memory.  FIXME, need to ensure the e820 map deals with
293
     * this?
294
     */
295
    return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
296
}
297
                    
298
static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
299
{
300
    lhs->selector = rhs->selector;
301
    lhs->base = rhs->base;
302
    lhs->limit = rhs->limit;
303
    lhs->type = 3;
304
    lhs->present = 1;
305
    lhs->dpl = 3;
306
    lhs->db = 0;
307
    lhs->s = 1;
308
    lhs->l = 0;
309
    lhs->g = 0;
310
    lhs->avl = 0;
311
    lhs->unusable = 0;
312
}
313

    
314
static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
315
{
316
    unsigned flags = rhs->flags;
317
    lhs->selector = rhs->selector;
318
    lhs->base = rhs->base;
319
    lhs->limit = rhs->limit;
320
    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
321
    lhs->present = (flags & DESC_P_MASK) != 0;
322
    lhs->dpl = rhs->selector & 3;
323
    lhs->db = (flags >> DESC_B_SHIFT) & 1;
324
    lhs->s = (flags & DESC_S_MASK) != 0;
325
    lhs->l = (flags >> DESC_L_SHIFT) & 1;
326
    lhs->g = (flags & DESC_G_MASK) != 0;
327
    lhs->avl = (flags & DESC_AVL_MASK) != 0;
328
    lhs->unusable = 0;
329
}
330

    
331
static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
332
{
333
    lhs->selector = rhs->selector;
334
    lhs->base = rhs->base;
335
    lhs->limit = rhs->limit;
336
    lhs->flags =
337
        (rhs->type << DESC_TYPE_SHIFT)
338
        | (rhs->present * DESC_P_MASK)
339
        | (rhs->dpl << DESC_DPL_SHIFT)
340
        | (rhs->db << DESC_B_SHIFT)
341
        | (rhs->s * DESC_S_MASK)
342
        | (rhs->l << DESC_L_SHIFT)
343
        | (rhs->g * DESC_G_MASK)
344
        | (rhs->avl * DESC_AVL_MASK);
345
}
346

    
347
static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
348
{
349
    if (set)
350
        *kvm_reg = *qemu_reg;
351
    else
352
        *qemu_reg = *kvm_reg;
353
}
354

    
355
static int kvm_getput_regs(CPUState *env, int set)
356
{
357
    struct kvm_regs regs;
358
    int ret = 0;
359

    
360
    if (!set) {
361
        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
362
        if (ret < 0)
363
            return ret;
364
    }
365

    
366
    kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
367
    kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
368
    kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
369
    kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
370
    kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
371
    kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
372
    kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
373
    kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
374
#ifdef TARGET_X86_64
375
    kvm_getput_reg(&regs.r8, &env->regs[8], set);
376
    kvm_getput_reg(&regs.r9, &env->regs[9], set);
377
    kvm_getput_reg(&regs.r10, &env->regs[10], set);
378
    kvm_getput_reg(&regs.r11, &env->regs[11], set);
379
    kvm_getput_reg(&regs.r12, &env->regs[12], set);
380
    kvm_getput_reg(&regs.r13, &env->regs[13], set);
381
    kvm_getput_reg(&regs.r14, &env->regs[14], set);
382
    kvm_getput_reg(&regs.r15, &env->regs[15], set);
383
#endif
384

    
385
    kvm_getput_reg(&regs.rflags, &env->eflags, set);
386
    kvm_getput_reg(&regs.rip, &env->eip, set);
387

    
388
    if (set)
389
        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
390

    
391
    return ret;
392
}
393

    
394
static int kvm_put_fpu(CPUState *env)
395
{
396
    struct kvm_fpu fpu;
397
    int i;
398

    
399
    memset(&fpu, 0, sizeof fpu);
400
    fpu.fsw = env->fpus & ~(7 << 11);
401
    fpu.fsw |= (env->fpstt & 7) << 11;
402
    fpu.fcw = env->fpuc;
403
    for (i = 0; i < 8; ++i)
404
        fpu.ftwx |= (!env->fptags[i]) << i;
405
    memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
406
    memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
407
    fpu.mxcsr = env->mxcsr;
408

    
409
    return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
410
}
411

    
412
static int kvm_put_sregs(CPUState *env)
413
{
414
    struct kvm_sregs sregs;
415

    
416
    memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
417
    if (env->interrupt_injected >= 0) {
418
        sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
419
                (uint64_t)1 << (env->interrupt_injected % 64);
420
    }
421

    
422
    if ((env->eflags & VM_MASK)) {
423
            set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
424
            set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
425
            set_v8086_seg(&sregs.es, &env->segs[R_ES]);
426
            set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
427
            set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
428
            set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
429
    } else {
430
            set_seg(&sregs.cs, &env->segs[R_CS]);
431
            set_seg(&sregs.ds, &env->segs[R_DS]);
432
            set_seg(&sregs.es, &env->segs[R_ES]);
433
            set_seg(&sregs.fs, &env->segs[R_FS]);
434
            set_seg(&sregs.gs, &env->segs[R_GS]);
435
            set_seg(&sregs.ss, &env->segs[R_SS]);
436

    
437
            if (env->cr[0] & CR0_PE_MASK) {
438
                /* force ss cpl to cs cpl */
439
                sregs.ss.selector = (sregs.ss.selector & ~3) |
440
                        (sregs.cs.selector & 3);
441
                sregs.ss.dpl = sregs.ss.selector & 3;
442
            }
443
    }
444

    
445
    set_seg(&sregs.tr, &env->tr);
446
    set_seg(&sregs.ldt, &env->ldt);
447

    
448
    sregs.idt.limit = env->idt.limit;
449
    sregs.idt.base = env->idt.base;
450
    sregs.gdt.limit = env->gdt.limit;
451
    sregs.gdt.base = env->gdt.base;
452

    
453
    sregs.cr0 = env->cr[0];
454
    sregs.cr2 = env->cr[2];
455
    sregs.cr3 = env->cr[3];
456
    sregs.cr4 = env->cr[4];
457

    
458
    sregs.cr8 = cpu_get_apic_tpr(env);
459
    sregs.apic_base = cpu_get_apic_base(env);
460

    
461
    sregs.efer = env->efer;
462

    
463
    return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
464
}
465

    
466
static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
467
                              uint32_t index, uint64_t value)
468
{
469
    entry->index = index;
470
    entry->data = value;
471
}
472

    
473
static int kvm_put_msrs(CPUState *env)
474
{
475
    struct {
476
        struct kvm_msrs info;
477
        struct kvm_msr_entry entries[100];
478
    } msr_data;
479
    struct kvm_msr_entry *msrs = msr_data.entries;
480
    int n = 0;
481

    
482
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
483
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
484
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
485
    if (kvm_has_msr_star(env))
486
        kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
487
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
488
#ifdef TARGET_X86_64
489
    /* FIXME if lm capable */
490
    kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
491
    kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
492
    kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
493
    kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
494
#endif
495
    msr_data.info.nmsrs = n;
496

    
497
    return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
498

    
499
}
500

    
501

    
502
static int kvm_get_fpu(CPUState *env)
503
{
504
    struct kvm_fpu fpu;
505
    int i, ret;
506

    
507
    ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
508
    if (ret < 0)
509
        return ret;
510

    
511
    env->fpstt = (fpu.fsw >> 11) & 7;
512
    env->fpus = fpu.fsw;
513
    env->fpuc = fpu.fcw;
514
    for (i = 0; i < 8; ++i)
515
        env->fptags[i] = !((fpu.ftwx >> i) & 1);
516
    memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
517
    memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
518
    env->mxcsr = fpu.mxcsr;
519

    
520
    return 0;
521
}
522

    
523
static int kvm_get_sregs(CPUState *env)
524
{
525
    struct kvm_sregs sregs;
526
    uint32_t hflags;
527
    int bit, i, ret;
528

    
529
    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
530
    if (ret < 0)
531
        return ret;
532

    
533
    /* There can only be one pending IRQ set in the bitmap at a time, so try
534
       to find it and save its number instead (-1 for none). */
535
    env->interrupt_injected = -1;
536
    for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
537
        if (sregs.interrupt_bitmap[i]) {
538
            bit = ctz64(sregs.interrupt_bitmap[i]);
539
            env->interrupt_injected = i * 64 + bit;
540
            break;
541
        }
542
    }
543

    
544
    get_seg(&env->segs[R_CS], &sregs.cs);
545
    get_seg(&env->segs[R_DS], &sregs.ds);
546
    get_seg(&env->segs[R_ES], &sregs.es);
547
    get_seg(&env->segs[R_FS], &sregs.fs);
548
    get_seg(&env->segs[R_GS], &sregs.gs);
549
    get_seg(&env->segs[R_SS], &sregs.ss);
550

    
551
    get_seg(&env->tr, &sregs.tr);
552
    get_seg(&env->ldt, &sregs.ldt);
553

    
554
    env->idt.limit = sregs.idt.limit;
555
    env->idt.base = sregs.idt.base;
556
    env->gdt.limit = sregs.gdt.limit;
557
    env->gdt.base = sregs.gdt.base;
558

    
559
    env->cr[0] = sregs.cr0;
560
    env->cr[2] = sregs.cr2;
561
    env->cr[3] = sregs.cr3;
562
    env->cr[4] = sregs.cr4;
563

    
564
    cpu_set_apic_base(env, sregs.apic_base);
565

    
566
    env->efer = sregs.efer;
567
    //cpu_set_apic_tpr(env, sregs.cr8);
568

    
569
#define HFLAG_COPY_MASK ~( \
570
                        HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
571
                        HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
572
                        HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
573
                        HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
574

    
575

    
576

    
577
    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
578
    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
579
    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
580
            (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
581
    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
582
    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
583
            (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
584

    
585
    if (env->efer & MSR_EFER_LMA) {
586
        hflags |= HF_LMA_MASK;
587
    }
588

    
589
    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
590
        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
591
    } else {
592
        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
593
                (DESC_B_SHIFT - HF_CS32_SHIFT);
594
        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
595
                (DESC_B_SHIFT - HF_SS32_SHIFT);
596
        if (!(env->cr[0] & CR0_PE_MASK) ||
597
                   (env->eflags & VM_MASK) ||
598
                   !(hflags & HF_CS32_MASK)) {
599
                hflags |= HF_ADDSEG_MASK;
600
            } else {
601
                hflags |= ((env->segs[R_DS].base |
602
                                env->segs[R_ES].base |
603
                                env->segs[R_SS].base) != 0) <<
604
                    HF_ADDSEG_SHIFT;
605
            }
606
    }
607
    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
608

    
609
    return 0;
610
}
611

    
612
static int kvm_get_msrs(CPUState *env)
613
{
614
    struct {
615
        struct kvm_msrs info;
616
        struct kvm_msr_entry entries[100];
617
    } msr_data;
618
    struct kvm_msr_entry *msrs = msr_data.entries;
619
    int ret, i, n;
620

    
621
    n = 0;
622
    msrs[n++].index = MSR_IA32_SYSENTER_CS;
623
    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
624
    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
625
    if (kvm_has_msr_star(env))
626
        msrs[n++].index = MSR_STAR;
627
    msrs[n++].index = MSR_IA32_TSC;
628
#ifdef TARGET_X86_64
629
    /* FIXME lm_capable_kernel */
630
    msrs[n++].index = MSR_CSTAR;
631
    msrs[n++].index = MSR_KERNELGSBASE;
632
    msrs[n++].index = MSR_FMASK;
633
    msrs[n++].index = MSR_LSTAR;
634
#endif
635
    msr_data.info.nmsrs = n;
636
    ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
637
    if (ret < 0)
638
        return ret;
639

    
640
    for (i = 0; i < ret; i++) {
641
        switch (msrs[i].index) {
642
        case MSR_IA32_SYSENTER_CS:
643
            env->sysenter_cs = msrs[i].data;
644
            break;
645
        case MSR_IA32_SYSENTER_ESP:
646
            env->sysenter_esp = msrs[i].data;
647
            break;
648
        case MSR_IA32_SYSENTER_EIP:
649
            env->sysenter_eip = msrs[i].data;
650
            break;
651
        case MSR_STAR:
652
            env->star = msrs[i].data;
653
            break;
654
#ifdef TARGET_X86_64
655
        case MSR_CSTAR:
656
            env->cstar = msrs[i].data;
657
            break;
658
        case MSR_KERNELGSBASE:
659
            env->kernelgsbase = msrs[i].data;
660
            break;
661
        case MSR_FMASK:
662
            env->fmask = msrs[i].data;
663
            break;
664
        case MSR_LSTAR:
665
            env->lstar = msrs[i].data;
666
            break;
667
#endif
668
        case MSR_IA32_TSC:
669
            env->tsc = msrs[i].data;
670
            break;
671
        }
672
    }
673

    
674
    return 0;
675
}
676

    
677
static int kvm_put_mp_state(CPUState *env)
678
{
679
    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
680

    
681
    return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
682
}
683

    
684
static int kvm_get_mp_state(CPUState *env)
685
{
686
    struct kvm_mp_state mp_state;
687
    int ret;
688

    
689
    ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
690
    if (ret < 0) {
691
        return ret;
692
    }
693
    env->mp_state = mp_state.mp_state;
694
    return 0;
695
}
696

    
697
int kvm_arch_put_registers(CPUState *env)
698
{
699
    int ret;
700

    
701
    ret = kvm_getput_regs(env, 1);
702
    if (ret < 0)
703
        return ret;
704

    
705
    ret = kvm_put_fpu(env);
706
    if (ret < 0)
707
        return ret;
708

    
709
    ret = kvm_put_sregs(env);
710
    if (ret < 0)
711
        return ret;
712

    
713
    ret = kvm_put_msrs(env);
714
    if (ret < 0)
715
        return ret;
716

    
717
    ret = kvm_put_mp_state(env);
718
    if (ret < 0)
719
        return ret;
720

    
721
    ret = kvm_get_mp_state(env);
722
    if (ret < 0)
723
        return ret;
724

    
725
    return 0;
726
}
727

    
728
int kvm_arch_get_registers(CPUState *env)
729
{
730
    int ret;
731

    
732
    ret = kvm_getput_regs(env, 0);
733
    if (ret < 0)
734
        return ret;
735

    
736
    ret = kvm_get_fpu(env);
737
    if (ret < 0)
738
        return ret;
739

    
740
    ret = kvm_get_sregs(env);
741
    if (ret < 0)
742
        return ret;
743

    
744
    ret = kvm_get_msrs(env);
745
    if (ret < 0)
746
        return ret;
747

    
748
    return 0;
749
}
750

    
751
int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
752
{
753
    /* Try to inject an interrupt if the guest can accept it */
754
    if (run->ready_for_interrupt_injection &&
755
        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
756
        (env->eflags & IF_MASK)) {
757
        int irq;
758

    
759
        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
760
        irq = cpu_get_pic_interrupt(env);
761
        if (irq >= 0) {
762
            struct kvm_interrupt intr;
763
            intr.irq = irq;
764
            /* FIXME: errors */
765
            dprintf("injected interrupt %d\n", irq);
766
            kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
767
        }
768
    }
769

    
770
    /* If we have an interrupt but the guest is not ready to receive an
771
     * interrupt, request an interrupt window exit.  This will
772
     * cause a return to userspace as soon as the guest is ready to
773
     * receive interrupts. */
774
    if ((env->interrupt_request & CPU_INTERRUPT_HARD))
775
        run->request_interrupt_window = 1;
776
    else
777
        run->request_interrupt_window = 0;
778

    
779
    dprintf("setting tpr\n");
780
    run->cr8 = cpu_get_apic_tpr(env);
781

    
782
    return 0;
783
}
784

    
785
int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
786
{
787
    if (run->if_flag)
788
        env->eflags |= IF_MASK;
789
    else
790
        env->eflags &= ~IF_MASK;
791
    
792
    cpu_set_apic_tpr(env, run->cr8);
793
    cpu_set_apic_base(env, run->apic_base);
794

    
795
    return 0;
796
}
797

    
798
static int kvm_handle_halt(CPUState *env)
799
{
800
    if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
801
          (env->eflags & IF_MASK)) &&
802
        !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
803
        env->halted = 1;
804
        env->exception_index = EXCP_HLT;
805
        return 0;
806
    }
807

    
808
    return 1;
809
}
810

    
811
int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
812
{
813
    int ret = 0;
814

    
815
    switch (run->exit_reason) {
816
    case KVM_EXIT_HLT:
817
        dprintf("handle_hlt\n");
818
        ret = kvm_handle_halt(env);
819
        break;
820
    }
821

    
822
    return ret;
823
}
824

    
825
#ifdef KVM_CAP_SET_GUEST_DEBUG
826
int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
827
{
828
    static const uint8_t int3 = 0xcc;
829

    
830
    if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
831
        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1))
832
        return -EINVAL;
833
    return 0;
834
}
835

    
836
int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
837
{
838
    uint8_t int3;
839

    
840
    if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
841
        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
842
        return -EINVAL;
843
    return 0;
844
}
845

    
846
static struct {
847
    target_ulong addr;
848
    int len;
849
    int type;
850
} hw_breakpoint[4];
851

    
852
static int nb_hw_breakpoint;
853

    
854
static int find_hw_breakpoint(target_ulong addr, int len, int type)
855
{
856
    int n;
857

    
858
    for (n = 0; n < nb_hw_breakpoint; n++)
859
        if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
860
            (hw_breakpoint[n].len == len || len == -1))
861
            return n;
862
    return -1;
863
}
864

    
865
int kvm_arch_insert_hw_breakpoint(target_ulong addr,
866
                                  target_ulong len, int type)
867
{
868
    switch (type) {
869
    case GDB_BREAKPOINT_HW:
870
        len = 1;
871
        break;
872
    case GDB_WATCHPOINT_WRITE:
873
    case GDB_WATCHPOINT_ACCESS:
874
        switch (len) {
875
        case 1:
876
            break;
877
        case 2:
878
        case 4:
879
        case 8:
880
            if (addr & (len - 1))
881
                return -EINVAL;
882
            break;
883
        default:
884
            return -EINVAL;
885
        }
886
        break;
887
    default:
888
        return -ENOSYS;
889
    }
890

    
891
    if (nb_hw_breakpoint == 4)
892
        return -ENOBUFS;
893

    
894
    if (find_hw_breakpoint(addr, len, type) >= 0)
895
        return -EEXIST;
896

    
897
    hw_breakpoint[nb_hw_breakpoint].addr = addr;
898
    hw_breakpoint[nb_hw_breakpoint].len = len;
899
    hw_breakpoint[nb_hw_breakpoint].type = type;
900
    nb_hw_breakpoint++;
901

    
902
    return 0;
903
}
904

    
905
int kvm_arch_remove_hw_breakpoint(target_ulong addr,
906
                                  target_ulong len, int type)
907
{
908
    int n;
909

    
910
    n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
911
    if (n < 0)
912
        return -ENOENT;
913

    
914
    nb_hw_breakpoint--;
915
    hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
916

    
917
    return 0;
918
}
919

    
920
void kvm_arch_remove_all_hw_breakpoints(void)
921
{
922
    nb_hw_breakpoint = 0;
923
}
924

    
925
static CPUWatchpoint hw_watchpoint;
926

    
927
int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
928
{
929
    int handle = 0;
930
    int n;
931

    
932
    if (arch_info->exception == 1) {
933
        if (arch_info->dr6 & (1 << 14)) {
934
            if (cpu_single_env->singlestep_enabled)
935
                handle = 1;
936
        } else {
937
            for (n = 0; n < 4; n++)
938
                if (arch_info->dr6 & (1 << n))
939
                    switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
940
                    case 0x0:
941
                        handle = 1;
942
                        break;
943
                    case 0x1:
944
                        handle = 1;
945
                        cpu_single_env->watchpoint_hit = &hw_watchpoint;
946
                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
947
                        hw_watchpoint.flags = BP_MEM_WRITE;
948
                        break;
949
                    case 0x3:
950
                        handle = 1;
951
                        cpu_single_env->watchpoint_hit = &hw_watchpoint;
952
                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
953
                        hw_watchpoint.flags = BP_MEM_ACCESS;
954
                        break;
955
                    }
956
        }
957
    } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc))
958
        handle = 1;
959

    
960
    if (!handle)
961
        kvm_update_guest_debug(cpu_single_env,
962
                        (arch_info->exception == 1) ?
963
                        KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
964

    
965
    return handle;
966
}
967

    
968
void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg)
969
{
970
    const uint8_t type_code[] = {
971
        [GDB_BREAKPOINT_HW] = 0x0,
972
        [GDB_WATCHPOINT_WRITE] = 0x1,
973
        [GDB_WATCHPOINT_ACCESS] = 0x3
974
    };
975
    const uint8_t len_code[] = {
976
        [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
977
    };
978
    int n;
979

    
980
    if (kvm_sw_breakpoints_active(env))
981
        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
982

    
983
    if (nb_hw_breakpoint > 0) {
984
        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
985
        dbg->arch.debugreg[7] = 0x0600;
986
        for (n = 0; n < nb_hw_breakpoint; n++) {
987
            dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
988
            dbg->arch.debugreg[7] |= (2 << (n * 2)) |
989
                (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
990
                (len_code[hw_breakpoint[n].len] << (18 + n*4));
991
        }
992
    }
993
}
994
#endif /* KVM_CAP_SET_GUEST_DEBUG */