Statistics
| Branch: | Revision:

root / target-i386 / kvm.c @ 0d09e41a

History | View | Annotate | Download (66 kB)

1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright (C) 2006-2008 Qumranet Technologies
5
 * Copyright IBM, Corp. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *
10
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11
 * See the COPYING file in the top-level directory.
12
 *
13
 */
14

    
15
#include <sys/types.h>
16
#include <sys/ioctl.h>
17
#include <sys/mman.h>
18
#include <sys/utsname.h>
19

    
20
#include <linux/kvm.h>
21
#include <linux/kvm_para.h>
22

    
23
#include "qemu-common.h"
24
#include "sysemu/sysemu.h"
25
#include "sysemu/kvm.h"
26
#include "kvm_i386.h"
27
#include "cpu.h"
28
#include "exec/gdbstub.h"
29
#include "qemu/host-utils.h"
30
#include "qemu/config-file.h"
31
#include "hw/i386/pc.h"
32
#include "hw/i386/apic.h"
33
#include "exec/ioport.h"
34
#include "hyperv.h"
35
#include "hw/pci/pci.h"
36

    
37
//#define DEBUG_KVM
38

    
39
#ifdef DEBUG_KVM
40
#define DPRINTF(fmt, ...) \
41
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
42
#else
43
#define DPRINTF(fmt, ...) \
44
    do { } while (0)
45
#endif
46

    
47
#define MSR_KVM_WALL_CLOCK  0x11
48
#define MSR_KVM_SYSTEM_TIME 0x12
49

    
50
#ifndef BUS_MCEERR_AR
51
#define BUS_MCEERR_AR 4
52
#endif
53
#ifndef BUS_MCEERR_AO
54
#define BUS_MCEERR_AO 5
55
#endif
56

    
57
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
58
    KVM_CAP_INFO(SET_TSS_ADDR),
59
    KVM_CAP_INFO(EXT_CPUID),
60
    KVM_CAP_INFO(MP_STATE),
61
    KVM_CAP_LAST_INFO
62
};
63

    
64
static bool has_msr_star;
65
static bool has_msr_hsave_pa;
66
static bool has_msr_tsc_adjust;
67
static bool has_msr_tsc_deadline;
68
static bool has_msr_async_pf_en;
69
static bool has_msr_pv_eoi_en;
70
static bool has_msr_misc_enable;
71
static int lm_capable_kernel;
72

    
73
bool kvm_allows_irq0_override(void)
74
{
75
    return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
76
}
77

    
78
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
79
{
80
    struct kvm_cpuid2 *cpuid;
81
    int r, size;
82

    
83
    size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
84
    cpuid = (struct kvm_cpuid2 *)g_malloc0(size);
85
    cpuid->nent = max;
86
    r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
87
    if (r == 0 && cpuid->nent >= max) {
88
        r = -E2BIG;
89
    }
90
    if (r < 0) {
91
        if (r == -E2BIG) {
92
            g_free(cpuid);
93
            return NULL;
94
        } else {
95
            fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
96
                    strerror(-r));
97
            exit(1);
98
        }
99
    }
100
    return cpuid;
101
}
102

    
103
/* Run KVM_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough
104
 * for all entries.
105
 */
106
static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s)
107
{
108
    struct kvm_cpuid2 *cpuid;
109
    int max = 1;
110
    while ((cpuid = try_get_cpuid(s, max)) == NULL) {
111
        max *= 2;
112
    }
113
    return cpuid;
114
}
115

    
116
struct kvm_para_features {
117
    int cap;
118
    int feature;
119
} para_features[] = {
120
    { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE },
121
    { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
122
    { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
123
    { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
124
    { -1, -1 }
125
};
126

    
127
static int get_para_features(KVMState *s)
128
{
129
    int i, features = 0;
130

    
131
    for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
132
        if (kvm_check_extension(s, para_features[i].cap)) {
133
            features |= (1 << para_features[i].feature);
134
        }
135
    }
136

    
137
    return features;
138
}
139

    
140

    
141
/* Returns the value for a specific register on the cpuid entry
142
 */
143
static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
144
{
145
    uint32_t ret = 0;
146
    switch (reg) {
147
    case R_EAX:
148
        ret = entry->eax;
149
        break;
150
    case R_EBX:
151
        ret = entry->ebx;
152
        break;
153
    case R_ECX:
154
        ret = entry->ecx;
155
        break;
156
    case R_EDX:
157
        ret = entry->edx;
158
        break;
159
    }
160
    return ret;
161
}
162

    
163
/* Find matching entry for function/index on kvm_cpuid2 struct
164
 */
165
static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
166
                                                 uint32_t function,
167
                                                 uint32_t index)
168
{
169
    int i;
170
    for (i = 0; i < cpuid->nent; ++i) {
171
        if (cpuid->entries[i].function == function &&
172
            cpuid->entries[i].index == index) {
173
            return &cpuid->entries[i];
174
        }
175
    }
176
    /* not found: */
177
    return NULL;
178
}
179

    
180
uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
181
                                      uint32_t index, int reg)
182
{
183
    struct kvm_cpuid2 *cpuid;
184
    uint32_t ret = 0;
185
    uint32_t cpuid_1_edx;
186
    bool found = false;
187

    
188
    cpuid = get_supported_cpuid(s);
189

    
190
    struct kvm_cpuid_entry2 *entry = cpuid_find_entry(cpuid, function, index);
191
    if (entry) {
192
        found = true;
193
        ret = cpuid_entry_get_reg(entry, reg);
194
    }
195

    
196
    /* Fixups for the data returned by KVM, below */
197

    
198
    if (function == 1 && reg == R_EDX) {
199
        /* KVM before 2.6.30 misreports the following features */
200
        ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA;
201
    } else if (function == 1 && reg == R_ECX) {
202
        /* We can set the hypervisor flag, even if KVM does not return it on
203
         * GET_SUPPORTED_CPUID
204
         */
205
        ret |= CPUID_EXT_HYPERVISOR;
206
        /* tsc-deadline flag is not returned by GET_SUPPORTED_CPUID, but it
207
         * can be enabled if the kernel has KVM_CAP_TSC_DEADLINE_TIMER,
208
         * and the irqchip is in the kernel.
209
         */
210
        if (kvm_irqchip_in_kernel() &&
211
                kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
212
            ret |= CPUID_EXT_TSC_DEADLINE_TIMER;
213
        }
214

    
215
        /* x2apic is reported by GET_SUPPORTED_CPUID, but it can't be enabled
216
         * without the in-kernel irqchip
217
         */
218
        if (!kvm_irqchip_in_kernel()) {
219
            ret &= ~CPUID_EXT_X2APIC;
220
        }
221
    } else if (function == 0x80000001 && reg == R_EDX) {
222
        /* On Intel, kvm returns cpuid according to the Intel spec,
223
         * so add missing bits according to the AMD spec:
224
         */
225
        cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
226
        ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
227
    }
228

    
229
    g_free(cpuid);
230

    
231
    /* fallback for older kernels */
232
    if ((function == KVM_CPUID_FEATURES) && !found) {
233
        ret = get_para_features(s);
234
    }
235

    
236
    return ret;
237
}
238

    
239
typedef struct HWPoisonPage {
240
    ram_addr_t ram_addr;
241
    QLIST_ENTRY(HWPoisonPage) list;
242
} HWPoisonPage;
243

    
244
static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list =
245
    QLIST_HEAD_INITIALIZER(hwpoison_page_list);
246

    
247
static void kvm_unpoison_all(void *param)
248
{
249
    HWPoisonPage *page, *next_page;
250

    
251
    QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
252
        QLIST_REMOVE(page, list);
253
        qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
254
        g_free(page);
255
    }
256
}
257

    
258
static void kvm_hwpoison_page_add(ram_addr_t ram_addr)
259
{
260
    HWPoisonPage *page;
261

    
262
    QLIST_FOREACH(page, &hwpoison_page_list, list) {
263
        if (page->ram_addr == ram_addr) {
264
            return;
265
        }
266
    }
267
    page = g_malloc(sizeof(HWPoisonPage));
268
    page->ram_addr = ram_addr;
269
    QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
270
}
271

    
272
static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
273
                                     int *max_banks)
274
{
275
    int r;
276

    
277
    r = kvm_check_extension(s, KVM_CAP_MCE);
278
    if (r > 0) {
279
        *max_banks = r;
280
        return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
281
    }
282
    return -ENOSYS;
283
}
284

    
285
static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
286
{
287
    CPUX86State *env = &cpu->env;
288
    uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
289
                      MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
290
    uint64_t mcg_status = MCG_STATUS_MCIP;
291

    
292
    if (code == BUS_MCEERR_AR) {
293
        status |= MCI_STATUS_AR | 0x134;
294
        mcg_status |= MCG_STATUS_EIPV;
295
    } else {
296
        status |= 0xc0;
297
        mcg_status |= MCG_STATUS_RIPV;
298
    }
299
    cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
300
                       (MCM_ADDR_PHYS << 6) | 0xc,
301
                       cpu_x86_support_mca_broadcast(env) ?
302
                       MCE_INJECT_BROADCAST : 0);
303
}
304

    
305
static void hardware_memory_error(void)
306
{
307
    fprintf(stderr, "Hardware memory error!\n");
308
    exit(1);
309
}
310

    
311
int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
312
{
313
    X86CPU *cpu = X86_CPU(c);
314
    CPUX86State *env = &cpu->env;
315
    ram_addr_t ram_addr;
316
    hwaddr paddr;
317

    
318
    if ((env->mcg_cap & MCG_SER_P) && addr
319
        && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) {
320
        if (qemu_ram_addr_from_host(addr, &ram_addr) ||
321
            !kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
322
            fprintf(stderr, "Hardware memory error for memory used by "
323
                    "QEMU itself instead of guest system!\n");
324
            /* Hope we are lucky for AO MCE */
325
            if (code == BUS_MCEERR_AO) {
326
                return 0;
327
            } else {
328
                hardware_memory_error();
329
            }
330
        }
331
        kvm_hwpoison_page_add(ram_addr);
332
        kvm_mce_inject(cpu, paddr, code);
333
    } else {
334
        if (code == BUS_MCEERR_AO) {
335
            return 0;
336
        } else if (code == BUS_MCEERR_AR) {
337
            hardware_memory_error();
338
        } else {
339
            return 1;
340
        }
341
    }
342
    return 0;
343
}
344

    
345
int kvm_arch_on_sigbus(int code, void *addr)
346
{
347
    if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
348
        ram_addr_t ram_addr;
349
        hwaddr paddr;
350

    
351
        /* Hope we are lucky for AO MCE */
352
        if (qemu_ram_addr_from_host(addr, &ram_addr) ||
353
            !kvm_physical_memory_addr_from_host(CPU(first_cpu)->kvm_state,
354
                                                addr, &paddr)) {
355
            fprintf(stderr, "Hardware memory error for memory used by "
356
                    "QEMU itself instead of guest system!: %p\n", addr);
357
            return 0;
358
        }
359
        kvm_hwpoison_page_add(ram_addr);
360
        kvm_mce_inject(x86_env_get_cpu(first_cpu), paddr, code);
361
    } else {
362
        if (code == BUS_MCEERR_AO) {
363
            return 0;
364
        } else if (code == BUS_MCEERR_AR) {
365
            hardware_memory_error();
366
        } else {
367
            return 1;
368
        }
369
    }
370
    return 0;
371
}
372

    
373
static int kvm_inject_mce_oldstyle(X86CPU *cpu)
374
{
375
    CPUX86State *env = &cpu->env;
376

    
377
    if (!kvm_has_vcpu_events() && env->exception_injected == EXCP12_MCHK) {
378
        unsigned int bank, bank_num = env->mcg_cap & 0xff;
379
        struct kvm_x86_mce mce;
380

    
381
        env->exception_injected = -1;
382

    
383
        /*
384
         * There must be at least one bank in use if an MCE is pending.
385
         * Find it and use its values for the event injection.
386
         */
387
        for (bank = 0; bank < bank_num; bank++) {
388
            if (env->mce_banks[bank * 4 + 1] & MCI_STATUS_VAL) {
389
                break;
390
            }
391
        }
392
        assert(bank < bank_num);
393

    
394
        mce.bank = bank;
395
        mce.status = env->mce_banks[bank * 4 + 1];
396
        mce.mcg_status = env->mcg_status;
397
        mce.addr = env->mce_banks[bank * 4 + 2];
398
        mce.misc = env->mce_banks[bank * 4 + 3];
399

    
400
        return kvm_vcpu_ioctl(CPU(cpu), KVM_X86_SET_MCE, &mce);
401
    }
402
    return 0;
403
}
404

    
405
static void cpu_update_state(void *opaque, int running, RunState state)
406
{
407
    CPUX86State *env = opaque;
408

    
409
    if (running) {
410
        env->tsc_valid = false;
411
    }
412
}
413

    
414
unsigned long kvm_arch_vcpu_id(CPUState *cs)
415
{
416
    X86CPU *cpu = X86_CPU(cs);
417
    return cpu->env.cpuid_apic_id;
418
}
419

    
420
#define KVM_MAX_CPUID_ENTRIES  100
421

    
422
int kvm_arch_init_vcpu(CPUState *cs)
423
{
424
    struct {
425
        struct kvm_cpuid2 cpuid;
426
        struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
427
    } QEMU_PACKED cpuid_data;
428
    X86CPU *cpu = X86_CPU(cs);
429
    CPUX86State *env = &cpu->env;
430
    uint32_t limit, i, j, cpuid_i;
431
    uint32_t unused;
432
    struct kvm_cpuid_entry2 *c;
433
    uint32_t signature[3];
434
    int r;
435

    
436
    cpuid_i = 0;
437

    
438
    /* Paravirtualization CPUIDs */
439
    c = &cpuid_data.entries[cpuid_i++];
440
    memset(c, 0, sizeof(*c));
441
    c->function = KVM_CPUID_SIGNATURE;
442
    if (!hyperv_enabled()) {
443
        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
444
        c->eax = 0;
445
    } else {
446
        memcpy(signature, "Microsoft Hv", 12);
447
        c->eax = HYPERV_CPUID_MIN;
448
    }
449
    c->ebx = signature[0];
450
    c->ecx = signature[1];
451
    c->edx = signature[2];
452

    
453
    c = &cpuid_data.entries[cpuid_i++];
454
    memset(c, 0, sizeof(*c));
455
    c->function = KVM_CPUID_FEATURES;
456
    c->eax = env->cpuid_kvm_features;
457

    
458
    if (hyperv_enabled()) {
459
        memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
460
        c->eax = signature[0];
461

    
462
        c = &cpuid_data.entries[cpuid_i++];
463
        memset(c, 0, sizeof(*c));
464
        c->function = HYPERV_CPUID_VERSION;
465
        c->eax = 0x00001bbc;
466
        c->ebx = 0x00060001;
467

    
468
        c = &cpuid_data.entries[cpuid_i++];
469
        memset(c, 0, sizeof(*c));
470
        c->function = HYPERV_CPUID_FEATURES;
471
        if (hyperv_relaxed_timing_enabled()) {
472
            c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
473
        }
474
        if (hyperv_vapic_recommended()) {
475
            c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
476
            c->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
477
        }
478

    
479
        c = &cpuid_data.entries[cpuid_i++];
480
        memset(c, 0, sizeof(*c));
481
        c->function = HYPERV_CPUID_ENLIGHTMENT_INFO;
482
        if (hyperv_relaxed_timing_enabled()) {
483
            c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
484
        }
485
        if (hyperv_vapic_recommended()) {
486
            c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
487
        }
488
        c->ebx = hyperv_get_spinlock_retries();
489

    
490
        c = &cpuid_data.entries[cpuid_i++];
491
        memset(c, 0, sizeof(*c));
492
        c->function = HYPERV_CPUID_IMPLEMENT_LIMITS;
493
        c->eax = 0x40;
494
        c->ebx = 0x40;
495

    
496
        c = &cpuid_data.entries[cpuid_i++];
497
        memset(c, 0, sizeof(*c));
498
        c->function = KVM_CPUID_SIGNATURE_NEXT;
499
        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
500
        c->eax = 0;
501
        c->ebx = signature[0];
502
        c->ecx = signature[1];
503
        c->edx = signature[2];
504
    }
505

    
506
    has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
507

    
508
    has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
509

    
510
    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
511

    
512
    for (i = 0; i <= limit; i++) {
513
        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
514
            fprintf(stderr, "unsupported level value: 0x%x\n", limit);
515
            abort();
516
        }
517
        c = &cpuid_data.entries[cpuid_i++];
518

    
519
        switch (i) {
520
        case 2: {
521
            /* Keep reading function 2 till all the input is received */
522
            int times;
523

    
524
            c->function = i;
525
            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
526
                       KVM_CPUID_FLAG_STATE_READ_NEXT;
527
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
528
            times = c->eax & 0xff;
529

    
530
            for (j = 1; j < times; ++j) {
531
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
532
                    fprintf(stderr, "cpuid_data is full, no space for "
533
                            "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
534
                    abort();
535
                }
536
                c = &cpuid_data.entries[cpuid_i++];
537
                c->function = i;
538
                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
539
                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
540
            }
541
            break;
542
        }
543
        case 4:
544
        case 0xb:
545
        case 0xd:
546
            for (j = 0; ; j++) {
547
                if (i == 0xd && j == 64) {
548
                    break;
549
                }
550
                c->function = i;
551
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
552
                c->index = j;
553
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
554

    
555
                if (i == 4 && c->eax == 0) {
556
                    break;
557
                }
558
                if (i == 0xb && !(c->ecx & 0xff00)) {
559
                    break;
560
                }
561
                if (i == 0xd && c->eax == 0) {
562
                    continue;
563
                }
564
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
565
                    fprintf(stderr, "cpuid_data is full, no space for "
566
                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
567
                    abort();
568
                }
569
                c = &cpuid_data.entries[cpuid_i++];
570
            }
571
            break;
572
        default:
573
            c->function = i;
574
            c->flags = 0;
575
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
576
            break;
577
        }
578
    }
579
    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
580

    
581
    for (i = 0x80000000; i <= limit; i++) {
582
        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
583
            fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
584
            abort();
585
        }
586
        c = &cpuid_data.entries[cpuid_i++];
587

    
588
        c->function = i;
589
        c->flags = 0;
590
        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
591
    }
592

    
593
    /* Call Centaur's CPUID instructions they are supported. */
594
    if (env->cpuid_xlevel2 > 0) {
595
        cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
596

    
597
        for (i = 0xC0000000; i <= limit; i++) {
598
            if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
599
                fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
600
                abort();
601
            }
602
            c = &cpuid_data.entries[cpuid_i++];
603

    
604
            c->function = i;
605
            c->flags = 0;
606
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
607
        }
608
    }
609

    
610
    cpuid_data.cpuid.nent = cpuid_i;
611

    
612
    if (((env->cpuid_version >> 8)&0xF) >= 6
613
        && (env->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)
614
        && kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) {
615
        uint64_t mcg_cap;
616
        int banks;
617
        int ret;
618

    
619
        ret = kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks);
620
        if (ret < 0) {
621
            fprintf(stderr, "kvm_get_mce_cap_supported: %s", strerror(-ret));
622
            return ret;
623
        }
624

    
625
        if (banks > MCE_BANKS_DEF) {
626
            banks = MCE_BANKS_DEF;
627
        }
628
        mcg_cap &= MCE_CAP_DEF;
629
        mcg_cap |= banks;
630
        ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &mcg_cap);
631
        if (ret < 0) {
632
            fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret));
633
            return ret;
634
        }
635

    
636
        env->mcg_cap = mcg_cap;
637
    }
638

    
639
    qemu_add_vm_change_state_handler(cpu_update_state, env);
640

    
641
    cpuid_data.cpuid.padding = 0;
642
    r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data);
643
    if (r) {
644
        return r;
645
    }
646

    
647
    r = kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL);
648
    if (r && env->tsc_khz) {
649
        r = kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz);
650
        if (r < 0) {
651
            fprintf(stderr, "KVM_SET_TSC_KHZ failed\n");
652
            return r;
653
        }
654
    }
655

    
656
    if (kvm_has_xsave()) {
657
        env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
658
    }
659

    
660
    return 0;
661
}
662

    
663
void kvm_arch_reset_vcpu(CPUState *cs)
664
{
665
    X86CPU *cpu = X86_CPU(cs);
666
    CPUX86State *env = &cpu->env;
667

    
668
    env->exception_injected = -1;
669
    env->interrupt_injected = -1;
670
    env->xcr0 = 1;
671
    if (kvm_irqchip_in_kernel()) {
672
        env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE :
673
                                          KVM_MP_STATE_UNINITIALIZED;
674
    } else {
675
        env->mp_state = KVM_MP_STATE_RUNNABLE;
676
    }
677
}
678

    
679
static int kvm_get_supported_msrs(KVMState *s)
680
{
681
    static int kvm_supported_msrs;
682
    int ret = 0;
683

    
684
    /* first time */
685
    if (kvm_supported_msrs == 0) {
686
        struct kvm_msr_list msr_list, *kvm_msr_list;
687

    
688
        kvm_supported_msrs = -1;
689

    
690
        /* Obtain MSR list from KVM.  These are the MSRs that we must
691
         * save/restore */
692
        msr_list.nmsrs = 0;
693
        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
694
        if (ret < 0 && ret != -E2BIG) {
695
            return ret;
696
        }
697
        /* Old kernel modules had a bug and could write beyond the provided
698
           memory. Allocate at least a safe amount of 1K. */
699
        kvm_msr_list = g_malloc0(MAX(1024, sizeof(msr_list) +
700
                                              msr_list.nmsrs *
701
                                              sizeof(msr_list.indices[0])));
702

    
703
        kvm_msr_list->nmsrs = msr_list.nmsrs;
704
        ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
705
        if (ret >= 0) {
706
            int i;
707

    
708
            for (i = 0; i < kvm_msr_list->nmsrs; i++) {
709
                if (kvm_msr_list->indices[i] == MSR_STAR) {
710
                    has_msr_star = true;
711
                    continue;
712
                }
713
                if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) {
714
                    has_msr_hsave_pa = true;
715
                    continue;
716
                }
717
                if (kvm_msr_list->indices[i] == MSR_TSC_ADJUST) {
718
                    has_msr_tsc_adjust = true;
719
                    continue;
720
                }
721
                if (kvm_msr_list->indices[i] == MSR_IA32_TSCDEADLINE) {
722
                    has_msr_tsc_deadline = true;
723
                    continue;
724
                }
725
                if (kvm_msr_list->indices[i] == MSR_IA32_MISC_ENABLE) {
726
                    has_msr_misc_enable = true;
727
                    continue;
728
                }
729
            }
730
        }
731

    
732
        g_free(kvm_msr_list);
733
    }
734

    
735
    return ret;
736
}
737

    
738
int kvm_arch_init(KVMState *s)
739
{
740
    QemuOptsList *list = qemu_find_opts("machine");
741
    uint64_t identity_base = 0xfffbc000;
742
    uint64_t shadow_mem;
743
    int ret;
744
    struct utsname utsname;
745

    
746
    ret = kvm_get_supported_msrs(s);
747
    if (ret < 0) {
748
        return ret;
749
    }
750

    
751
    uname(&utsname);
752
    lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
753

    
754
    /*
755
     * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly.
756
     * In order to use vm86 mode, an EPT identity map and a TSS  are needed.
757
     * Since these must be part of guest physical memory, we need to allocate
758
     * them, both by setting their start addresses in the kernel and by
759
     * creating a corresponding e820 entry. We need 4 pages before the BIOS.
760
     *
761
     * Older KVM versions may not support setting the identity map base. In
762
     * that case we need to stick with the default, i.e. a 256K maximum BIOS
763
     * size.
764
     */
765
    if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
766
        /* Allows up to 16M BIOSes. */
767
        identity_base = 0xfeffc000;
768

    
769
        ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base);
770
        if (ret < 0) {
771
            return ret;
772
        }
773
    }
774

    
775
    /* Set TSS base one page after EPT identity map. */
776
    ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000);
777
    if (ret < 0) {
778
        return ret;
779
    }
780

    
781
    /* Tell fw_cfg to notify the BIOS to reserve the range. */
782
    ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED);
783
    if (ret < 0) {
784
        fprintf(stderr, "e820_add_entry() table is full\n");
785
        return ret;
786
    }
787
    qemu_register_reset(kvm_unpoison_all, NULL);
788

    
789
    if (!QTAILQ_EMPTY(&list->head)) {
790
        shadow_mem = qemu_opt_get_size(QTAILQ_FIRST(&list->head),
791
                                       "kvm_shadow_mem", -1);
792
        if (shadow_mem != -1) {
793
            shadow_mem /= 4096;
794
            ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem);
795
            if (ret < 0) {
796
                return ret;
797
            }
798
        }
799
    }
800
    return 0;
801
}
802

    
803
static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
804
{
805
    lhs->selector = rhs->selector;
806
    lhs->base = rhs->base;
807
    lhs->limit = rhs->limit;
808
    lhs->type = 3;
809
    lhs->present = 1;
810
    lhs->dpl = 3;
811
    lhs->db = 0;
812
    lhs->s = 1;
813
    lhs->l = 0;
814
    lhs->g = 0;
815
    lhs->avl = 0;
816
    lhs->unusable = 0;
817
}
818

    
819
static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
820
{
821
    unsigned flags = rhs->flags;
822
    lhs->selector = rhs->selector;
823
    lhs->base = rhs->base;
824
    lhs->limit = rhs->limit;
825
    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
826
    lhs->present = (flags & DESC_P_MASK) != 0;
827
    lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3;
828
    lhs->db = (flags >> DESC_B_SHIFT) & 1;
829
    lhs->s = (flags & DESC_S_MASK) != 0;
830
    lhs->l = (flags >> DESC_L_SHIFT) & 1;
831
    lhs->g = (flags & DESC_G_MASK) != 0;
832
    lhs->avl = (flags & DESC_AVL_MASK) != 0;
833
    lhs->unusable = 0;
834
    lhs->padding = 0;
835
}
836

    
837
static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
838
{
839
    lhs->selector = rhs->selector;
840
    lhs->base = rhs->base;
841
    lhs->limit = rhs->limit;
842
    lhs->flags = (rhs->type << DESC_TYPE_SHIFT) |
843
                 (rhs->present * DESC_P_MASK) |
844
                 (rhs->dpl << DESC_DPL_SHIFT) |
845
                 (rhs->db << DESC_B_SHIFT) |
846
                 (rhs->s * DESC_S_MASK) |
847
                 (rhs->l << DESC_L_SHIFT) |
848
                 (rhs->g * DESC_G_MASK) |
849
                 (rhs->avl * DESC_AVL_MASK);
850
}
851

    
852
static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
853
{
854
    if (set) {
855
        *kvm_reg = *qemu_reg;
856
    } else {
857
        *qemu_reg = *kvm_reg;
858
    }
859
}
860

    
861
static int kvm_getput_regs(X86CPU *cpu, int set)
862
{
863
    CPUX86State *env = &cpu->env;
864
    struct kvm_regs regs;
865
    int ret = 0;
866

    
867
    if (!set) {
868
        ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_REGS, &regs);
869
        if (ret < 0) {
870
            return ret;
871
        }
872
    }
873

    
874
    kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
875
    kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
876
    kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
877
    kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
878
    kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
879
    kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
880
    kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
881
    kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
882
#ifdef TARGET_X86_64
883
    kvm_getput_reg(&regs.r8, &env->regs[8], set);
884
    kvm_getput_reg(&regs.r9, &env->regs[9], set);
885
    kvm_getput_reg(&regs.r10, &env->regs[10], set);
886
    kvm_getput_reg(&regs.r11, &env->regs[11], set);
887
    kvm_getput_reg(&regs.r12, &env->regs[12], set);
888
    kvm_getput_reg(&regs.r13, &env->regs[13], set);
889
    kvm_getput_reg(&regs.r14, &env->regs[14], set);
890
    kvm_getput_reg(&regs.r15, &env->regs[15], set);
891
#endif
892

    
893
    kvm_getput_reg(&regs.rflags, &env->eflags, set);
894
    kvm_getput_reg(&regs.rip, &env->eip, set);
895

    
896
    if (set) {
897
        ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_REGS, &regs);
898
    }
899

    
900
    return ret;
901
}
902

    
903
static int kvm_put_fpu(X86CPU *cpu)
904
{
905
    CPUX86State *env = &cpu->env;
906
    struct kvm_fpu fpu;
907
    int i;
908

    
909
    memset(&fpu, 0, sizeof fpu);
910
    fpu.fsw = env->fpus & ~(7 << 11);
911
    fpu.fsw |= (env->fpstt & 7) << 11;
912
    fpu.fcw = env->fpuc;
913
    fpu.last_opcode = env->fpop;
914
    fpu.last_ip = env->fpip;
915
    fpu.last_dp = env->fpdp;
916
    for (i = 0; i < 8; ++i) {
917
        fpu.ftwx |= (!env->fptags[i]) << i;
918
    }
919
    memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
920
    memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
921
    fpu.mxcsr = env->mxcsr;
922

    
923
    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
924
}
925

    
926
#define XSAVE_FCW_FSW     0
927
#define XSAVE_FTW_FOP     1
928
#define XSAVE_CWD_RIP     2
929
#define XSAVE_CWD_RDP     4
930
#define XSAVE_MXCSR       6
931
#define XSAVE_ST_SPACE    8
932
#define XSAVE_XMM_SPACE   40
933
#define XSAVE_XSTATE_BV   128
934
#define XSAVE_YMMH_SPACE  144
935

    
936
static int kvm_put_xsave(X86CPU *cpu)
937
{
938
    CPUX86State *env = &cpu->env;
939
    struct kvm_xsave* xsave = env->kvm_xsave_buf;
940
    uint16_t cwd, swd, twd;
941
    int i, r;
942

    
943
    if (!kvm_has_xsave()) {
944
        return kvm_put_fpu(cpu);
945
    }
946

    
947
    memset(xsave, 0, sizeof(struct kvm_xsave));
948
    twd = 0;
949
    swd = env->fpus & ~(7 << 11);
950
    swd |= (env->fpstt & 7) << 11;
951
    cwd = env->fpuc;
952
    for (i = 0; i < 8; ++i) {
953
        twd |= (!env->fptags[i]) << i;
954
    }
955
    xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
956
    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
957
    memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
958
    memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
959
    memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
960
            sizeof env->fpregs);
961
    memcpy(&xsave->region[XSAVE_XMM_SPACE], env->xmm_regs,
962
            sizeof env->xmm_regs);
963
    xsave->region[XSAVE_MXCSR] = env->mxcsr;
964
    *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
965
    memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
966
            sizeof env->ymmh_regs);
967
    r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
968
    return r;
969
}
970

    
971
static int kvm_put_xcrs(X86CPU *cpu)
972
{
973
    CPUX86State *env = &cpu->env;
974
    struct kvm_xcrs xcrs;
975

    
976
    if (!kvm_has_xcrs()) {
977
        return 0;
978
    }
979

    
980
    xcrs.nr_xcrs = 1;
981
    xcrs.flags = 0;
982
    xcrs.xcrs[0].xcr = 0;
983
    xcrs.xcrs[0].value = env->xcr0;
984
    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XCRS, &xcrs);
985
}
986

    
987
static int kvm_put_sregs(X86CPU *cpu)
988
{
989
    CPUX86State *env = &cpu->env;
990
    struct kvm_sregs sregs;
991

    
992
    memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
993
    if (env->interrupt_injected >= 0) {
994
        sregs.interrupt_bitmap[env->interrupt_injected / 64] |=
995
                (uint64_t)1 << (env->interrupt_injected % 64);
996
    }
997

    
998
    if ((env->eflags & VM_MASK)) {
999
        set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
1000
        set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
1001
        set_v8086_seg(&sregs.es, &env->segs[R_ES]);
1002
        set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
1003
        set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
1004
        set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
1005
    } else {
1006
        set_seg(&sregs.cs, &env->segs[R_CS]);
1007
        set_seg(&sregs.ds, &env->segs[R_DS]);
1008
        set_seg(&sregs.es, &env->segs[R_ES]);
1009
        set_seg(&sregs.fs, &env->segs[R_FS]);
1010
        set_seg(&sregs.gs, &env->segs[R_GS]);
1011
        set_seg(&sregs.ss, &env->segs[R_SS]);
1012
    }
1013

    
1014
    set_seg(&sregs.tr, &env->tr);
1015
    set_seg(&sregs.ldt, &env->ldt);
1016

    
1017
    sregs.idt.limit = env->idt.limit;
1018
    sregs.idt.base = env->idt.base;
1019
    memset(sregs.idt.padding, 0, sizeof sregs.idt.padding);
1020
    sregs.gdt.limit = env->gdt.limit;
1021
    sregs.gdt.base = env->gdt.base;
1022
    memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding);
1023

    
1024
    sregs.cr0 = env->cr[0];
1025
    sregs.cr2 = env->cr[2];
1026
    sregs.cr3 = env->cr[3];
1027
    sregs.cr4 = env->cr[4];
1028

    
1029
    sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
1030
    sregs.apic_base = cpu_get_apic_base(env->apic_state);
1031

    
1032
    sregs.efer = env->efer;
1033

    
1034
    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
1035
}
1036

    
1037
static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
1038
                              uint32_t index, uint64_t value)
1039
{
1040
    entry->index = index;
1041
    entry->data = value;
1042
}
1043

    
1044
static int kvm_put_msrs(X86CPU *cpu, int level)
1045
{
1046
    CPUX86State *env = &cpu->env;
1047
    struct {
1048
        struct kvm_msrs info;
1049
        struct kvm_msr_entry entries[100];
1050
    } msr_data;
1051
    struct kvm_msr_entry *msrs = msr_data.entries;
1052
    int n = 0;
1053

    
1054
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
1055
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
1056
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
1057
    kvm_msr_entry_set(&msrs[n++], MSR_PAT, env->pat);
1058
    if (has_msr_star) {
1059
        kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
1060
    }
1061
    if (has_msr_hsave_pa) {
1062
        kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
1063
    }
1064
    if (has_msr_tsc_adjust) {
1065
        kvm_msr_entry_set(&msrs[n++], MSR_TSC_ADJUST, env->tsc_adjust);
1066
    }
1067
    if (has_msr_tsc_deadline) {
1068
        kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSCDEADLINE, env->tsc_deadline);
1069
    }
1070
    if (has_msr_misc_enable) {
1071
        kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE,
1072
                          env->msr_ia32_misc_enable);
1073
    }
1074
#ifdef TARGET_X86_64
1075
    if (lm_capable_kernel) {
1076
        kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
1077
        kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
1078
        kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
1079
        kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
1080
    }
1081
#endif
1082
    if (level == KVM_PUT_FULL_STATE) {
1083
        /*
1084
         * KVM is yet unable to synchronize TSC values of multiple VCPUs on
1085
         * writeback. Until this is fixed, we only write the offset to SMP
1086
         * guests after migration, desynchronizing the VCPUs, but avoiding
1087
         * huge jump-backs that would occur without any writeback at all.
1088
         */
1089
        if (smp_cpus == 1 || env->tsc != 0) {
1090
            kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
1091
        }
1092
    }
1093
    /*
1094
     * The following paravirtual MSRs have side effects on the guest or are
1095
     * too heavy for normal writeback. Limit them to reset or full state
1096
     * updates.
1097
     */
1098
    if (level >= KVM_PUT_RESET_STATE) {
1099
        kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
1100
                          env->system_time_msr);
1101
        kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
1102
        if (has_msr_async_pf_en) {
1103
            kvm_msr_entry_set(&msrs[n++], MSR_KVM_ASYNC_PF_EN,
1104
                              env->async_pf_en_msr);
1105
        }
1106
        if (has_msr_pv_eoi_en) {
1107
            kvm_msr_entry_set(&msrs[n++], MSR_KVM_PV_EOI_EN,
1108
                              env->pv_eoi_en_msr);
1109
        }
1110
        if (hyperv_hypercall_available()) {
1111
            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0);
1112
            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0);
1113
        }
1114
        if (hyperv_vapic_recommended()) {
1115
            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0);
1116
        }
1117
    }
1118
    if (env->mcg_cap) {
1119
        int i;
1120

    
1121
        kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
1122
        kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
1123
        for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
1124
            kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
1125
        }
1126
    }
1127

    
1128
    msr_data.info.nmsrs = n;
1129

    
1130
    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
1131

    
1132
}
1133

    
1134

    
1135
static int kvm_get_fpu(X86CPU *cpu)
1136
{
1137
    CPUX86State *env = &cpu->env;
1138
    struct kvm_fpu fpu;
1139
    int i, ret;
1140

    
1141
    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_FPU, &fpu);
1142
    if (ret < 0) {
1143
        return ret;
1144
    }
1145

    
1146
    env->fpstt = (fpu.fsw >> 11) & 7;
1147
    env->fpus = fpu.fsw;
1148
    env->fpuc = fpu.fcw;
1149
    env->fpop = fpu.last_opcode;
1150
    env->fpip = fpu.last_ip;
1151
    env->fpdp = fpu.last_dp;
1152
    for (i = 0; i < 8; ++i) {
1153
        env->fptags[i] = !((fpu.ftwx >> i) & 1);
1154
    }
1155
    memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
1156
    memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
1157
    env->mxcsr = fpu.mxcsr;
1158

    
1159
    return 0;
1160
}
1161

    
1162
static int kvm_get_xsave(X86CPU *cpu)
1163
{
1164
    CPUX86State *env = &cpu->env;
1165
    struct kvm_xsave* xsave = env->kvm_xsave_buf;
1166
    int ret, i;
1167
    uint16_t cwd, swd, twd;
1168

    
1169
    if (!kvm_has_xsave()) {
1170
        return kvm_get_fpu(cpu);
1171
    }
1172

    
1173
    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
1174
    if (ret < 0) {
1175
        return ret;
1176
    }
1177

    
1178
    cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
1179
    swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
1180
    twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
1181
    env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
1182
    env->fpstt = (swd >> 11) & 7;
1183
    env->fpus = swd;
1184
    env->fpuc = cwd;
1185
    for (i = 0; i < 8; ++i) {
1186
        env->fptags[i] = !((twd >> i) & 1);
1187
    }
1188
    memcpy(&env->fpip, &xsave->region[XSAVE_CWD_RIP], sizeof(env->fpip));
1189
    memcpy(&env->fpdp, &xsave->region[XSAVE_CWD_RDP], sizeof(env->fpdp));
1190
    env->mxcsr = xsave->region[XSAVE_MXCSR];
1191
    memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
1192
            sizeof env->fpregs);
1193
    memcpy(env->xmm_regs, &xsave->region[XSAVE_XMM_SPACE],
1194
            sizeof env->xmm_regs);
1195
    env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
1196
    memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
1197
            sizeof env->ymmh_regs);
1198
    return 0;
1199
}
1200

    
1201
static int kvm_get_xcrs(X86CPU *cpu)
1202
{
1203
    CPUX86State *env = &cpu->env;
1204
    int i, ret;
1205
    struct kvm_xcrs xcrs;
1206

    
1207
    if (!kvm_has_xcrs()) {
1208
        return 0;
1209
    }
1210

    
1211
    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XCRS, &xcrs);
1212
    if (ret < 0) {
1213
        return ret;
1214
    }
1215

    
1216
    for (i = 0; i < xcrs.nr_xcrs; i++) {
1217
        /* Only support xcr0 now */
1218
        if (xcrs.xcrs[0].xcr == 0) {
1219
            env->xcr0 = xcrs.xcrs[0].value;
1220
            break;
1221
        }
1222
    }
1223
    return 0;
1224
}
1225

    
1226
static int kvm_get_sregs(X86CPU *cpu)
1227
{
1228
    CPUX86State *env = &cpu->env;
1229
    struct kvm_sregs sregs;
1230
    uint32_t hflags;
1231
    int bit, i, ret;
1232

    
1233
    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1234
    if (ret < 0) {
1235
        return ret;
1236
    }
1237

    
1238
    /* There can only be one pending IRQ set in the bitmap at a time, so try
1239
       to find it and save its number instead (-1 for none). */
1240
    env->interrupt_injected = -1;
1241
    for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) {
1242
        if (sregs.interrupt_bitmap[i]) {
1243
            bit = ctz64(sregs.interrupt_bitmap[i]);
1244
            env->interrupt_injected = i * 64 + bit;
1245
            break;
1246
        }
1247
    }
1248

    
1249
    get_seg(&env->segs[R_CS], &sregs.cs);
1250
    get_seg(&env->segs[R_DS], &sregs.ds);
1251
    get_seg(&env->segs[R_ES], &sregs.es);
1252
    get_seg(&env->segs[R_FS], &sregs.fs);
1253
    get_seg(&env->segs[R_GS], &sregs.gs);
1254
    get_seg(&env->segs[R_SS], &sregs.ss);
1255

    
1256
    get_seg(&env->tr, &sregs.tr);
1257
    get_seg(&env->ldt, &sregs.ldt);
1258

    
1259
    env->idt.limit = sregs.idt.limit;
1260
    env->idt.base = sregs.idt.base;
1261
    env->gdt.limit = sregs.gdt.limit;
1262
    env->gdt.base = sregs.gdt.base;
1263

    
1264
    env->cr[0] = sregs.cr0;
1265
    env->cr[2] = sregs.cr2;
1266
    env->cr[3] = sregs.cr3;
1267
    env->cr[4] = sregs.cr4;
1268

    
1269
    env->efer = sregs.efer;
1270

    
1271
    /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */
1272

    
1273
#define HFLAG_COPY_MASK \
1274
    ~( HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
1275
       HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
1276
       HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
1277
       HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
1278

    
1279
    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
1280
    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
1281
    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
1282
                (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
1283
    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
1284
    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
1285
                (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
1286

    
1287
    if (env->efer & MSR_EFER_LMA) {
1288
        hflags |= HF_LMA_MASK;
1289
    }
1290

    
1291
    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
1292
        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
1293
    } else {
1294
        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
1295
                    (DESC_B_SHIFT - HF_CS32_SHIFT);
1296
        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
1297
                    (DESC_B_SHIFT - HF_SS32_SHIFT);
1298
        if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK) ||
1299
            !(hflags & HF_CS32_MASK)) {
1300
            hflags |= HF_ADDSEG_MASK;
1301
        } else {
1302
            hflags |= ((env->segs[R_DS].base | env->segs[R_ES].base |
1303
                        env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
1304
        }
1305
    }
1306
    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
1307

    
1308
    return 0;
1309
}
1310

    
1311
static int kvm_get_msrs(X86CPU *cpu)
1312
{
1313
    CPUX86State *env = &cpu->env;
1314
    struct {
1315
        struct kvm_msrs info;
1316
        struct kvm_msr_entry entries[100];
1317
    } msr_data;
1318
    struct kvm_msr_entry *msrs = msr_data.entries;
1319
    int ret, i, n;
1320

    
1321
    n = 0;
1322
    msrs[n++].index = MSR_IA32_SYSENTER_CS;
1323
    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
1324
    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
1325
    msrs[n++].index = MSR_PAT;
1326
    if (has_msr_star) {
1327
        msrs[n++].index = MSR_STAR;
1328
    }
1329
    if (has_msr_hsave_pa) {
1330
        msrs[n++].index = MSR_VM_HSAVE_PA;
1331
    }
1332
    if (has_msr_tsc_adjust) {
1333
        msrs[n++].index = MSR_TSC_ADJUST;
1334
    }
1335
    if (has_msr_tsc_deadline) {
1336
        msrs[n++].index = MSR_IA32_TSCDEADLINE;
1337
    }
1338
    if (has_msr_misc_enable) {
1339
        msrs[n++].index = MSR_IA32_MISC_ENABLE;
1340
    }
1341

    
1342
    if (!env->tsc_valid) {
1343
        msrs[n++].index = MSR_IA32_TSC;
1344
        env->tsc_valid = !runstate_is_running();
1345
    }
1346

    
1347
#ifdef TARGET_X86_64
1348
    if (lm_capable_kernel) {
1349
        msrs[n++].index = MSR_CSTAR;
1350
        msrs[n++].index = MSR_KERNELGSBASE;
1351
        msrs[n++].index = MSR_FMASK;
1352
        msrs[n++].index = MSR_LSTAR;
1353
    }
1354
#endif
1355
    msrs[n++].index = MSR_KVM_SYSTEM_TIME;
1356
    msrs[n++].index = MSR_KVM_WALL_CLOCK;
1357
    if (has_msr_async_pf_en) {
1358
        msrs[n++].index = MSR_KVM_ASYNC_PF_EN;
1359
    }
1360
    if (has_msr_pv_eoi_en) {
1361
        msrs[n++].index = MSR_KVM_PV_EOI_EN;
1362
    }
1363

    
1364
    if (env->mcg_cap) {
1365
        msrs[n++].index = MSR_MCG_STATUS;
1366
        msrs[n++].index = MSR_MCG_CTL;
1367
        for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
1368
            msrs[n++].index = MSR_MC0_CTL + i;
1369
        }
1370
    }
1371

    
1372
    msr_data.info.nmsrs = n;
1373
    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
1374
    if (ret < 0) {
1375
        return ret;
1376
    }
1377

    
1378
    for (i = 0; i < ret; i++) {
1379
        switch (msrs[i].index) {
1380
        case MSR_IA32_SYSENTER_CS:
1381
            env->sysenter_cs = msrs[i].data;
1382
            break;
1383
        case MSR_IA32_SYSENTER_ESP:
1384
            env->sysenter_esp = msrs[i].data;
1385
            break;
1386
        case MSR_IA32_SYSENTER_EIP:
1387
            env->sysenter_eip = msrs[i].data;
1388
            break;
1389
        case MSR_PAT:
1390
            env->pat = msrs[i].data;
1391
            break;
1392
        case MSR_STAR:
1393
            env->star = msrs[i].data;
1394
            break;
1395
#ifdef TARGET_X86_64
1396
        case MSR_CSTAR:
1397
            env->cstar = msrs[i].data;
1398
            break;
1399
        case MSR_KERNELGSBASE:
1400
            env->kernelgsbase = msrs[i].data;
1401
            break;
1402
        case MSR_FMASK:
1403
            env->fmask = msrs[i].data;
1404
            break;
1405
        case MSR_LSTAR:
1406
            env->lstar = msrs[i].data;
1407
            break;
1408
#endif
1409
        case MSR_IA32_TSC:
1410
            env->tsc = msrs[i].data;
1411
            break;
1412
        case MSR_TSC_ADJUST:
1413
            env->tsc_adjust = msrs[i].data;
1414
            break;
1415
        case MSR_IA32_TSCDEADLINE:
1416
            env->tsc_deadline = msrs[i].data;
1417
            break;
1418
        case MSR_VM_HSAVE_PA:
1419
            env->vm_hsave = msrs[i].data;
1420
            break;
1421
        case MSR_KVM_SYSTEM_TIME:
1422
            env->system_time_msr = msrs[i].data;
1423
            break;
1424
        case MSR_KVM_WALL_CLOCK:
1425
            env->wall_clock_msr = msrs[i].data;
1426
            break;
1427
        case MSR_MCG_STATUS:
1428
            env->mcg_status = msrs[i].data;
1429
            break;
1430
        case MSR_MCG_CTL:
1431
            env->mcg_ctl = msrs[i].data;
1432
            break;
1433
        case MSR_IA32_MISC_ENABLE:
1434
            env->msr_ia32_misc_enable = msrs[i].data;
1435
            break;
1436
        default:
1437
            if (msrs[i].index >= MSR_MC0_CTL &&
1438
                msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
1439
                env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
1440
            }
1441
            break;
1442
        case MSR_KVM_ASYNC_PF_EN:
1443
            env->async_pf_en_msr = msrs[i].data;
1444
            break;
1445
        case MSR_KVM_PV_EOI_EN:
1446
            env->pv_eoi_en_msr = msrs[i].data;
1447
            break;
1448
        }
1449
    }
1450

    
1451
    return 0;
1452
}
1453

    
1454
static int kvm_put_mp_state(X86CPU *cpu)
1455
{
1456
    struct kvm_mp_state mp_state = { .mp_state = cpu->env.mp_state };
1457

    
1458
    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
1459
}
1460

    
1461
static int kvm_get_mp_state(X86CPU *cpu)
1462
{
1463
    CPUState *cs = CPU(cpu);
1464
    CPUX86State *env = &cpu->env;
1465
    struct kvm_mp_state mp_state;
1466
    int ret;
1467

    
1468
    ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state);
1469
    if (ret < 0) {
1470
        return ret;
1471
    }
1472
    env->mp_state = mp_state.mp_state;
1473
    if (kvm_irqchip_in_kernel()) {
1474
        cs->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED);
1475
    }
1476
    return 0;
1477
}
1478

    
1479
static int kvm_get_apic(X86CPU *cpu)
1480
{
1481
    CPUX86State *env = &cpu->env;
1482
    DeviceState *apic = env->apic_state;
1483
    struct kvm_lapic_state kapic;
1484
    int ret;
1485

    
1486
    if (apic && kvm_irqchip_in_kernel()) {
1487
        ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_LAPIC, &kapic);
1488
        if (ret < 0) {
1489
            return ret;
1490
        }
1491

    
1492
        kvm_get_apic_state(apic, &kapic);
1493
    }
1494
    return 0;
1495
}
1496

    
1497
static int kvm_put_apic(X86CPU *cpu)
1498
{
1499
    CPUX86State *env = &cpu->env;
1500
    DeviceState *apic = env->apic_state;
1501
    struct kvm_lapic_state kapic;
1502

    
1503
    if (apic && kvm_irqchip_in_kernel()) {
1504
        kvm_put_apic_state(apic, &kapic);
1505

    
1506
        return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_LAPIC, &kapic);
1507
    }
1508
    return 0;
1509
}
1510

    
1511
static int kvm_put_vcpu_events(X86CPU *cpu, int level)
1512
{
1513
    CPUX86State *env = &cpu->env;
1514
    struct kvm_vcpu_events events;
1515

    
1516
    if (!kvm_has_vcpu_events()) {
1517
        return 0;
1518
    }
1519

    
1520
    events.exception.injected = (env->exception_injected >= 0);
1521
    events.exception.nr = env->exception_injected;
1522
    events.exception.has_error_code = env->has_error_code;
1523
    events.exception.error_code = env->error_code;
1524
    events.exception.pad = 0;
1525

    
1526
    events.interrupt.injected = (env->interrupt_injected >= 0);
1527
    events.interrupt.nr = env->interrupt_injected;
1528
    events.interrupt.soft = env->soft_interrupt;
1529

    
1530
    events.nmi.injected = env->nmi_injected;
1531
    events.nmi.pending = env->nmi_pending;
1532
    events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
1533
    events.nmi.pad = 0;
1534

    
1535
    events.sipi_vector = env->sipi_vector;
1536

    
1537
    events.flags = 0;
1538
    if (level >= KVM_PUT_RESET_STATE) {
1539
        events.flags |=
1540
            KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
1541
    }
1542

    
1543
    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
1544
}
1545

    
1546
static int kvm_get_vcpu_events(X86CPU *cpu)
1547
{
1548
    CPUX86State *env = &cpu->env;
1549
    struct kvm_vcpu_events events;
1550
    int ret;
1551

    
1552
    if (!kvm_has_vcpu_events()) {
1553
        return 0;
1554
    }
1555

    
1556
    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
1557
    if (ret < 0) {
1558
       return ret;
1559
    }
1560
    env->exception_injected =
1561
       events.exception.injected ? events.exception.nr : -1;
1562
    env->has_error_code = events.exception.has_error_code;
1563
    env->error_code = events.exception.error_code;
1564

    
1565
    env->interrupt_injected =
1566
        events.interrupt.injected ? events.interrupt.nr : -1;
1567
    env->soft_interrupt = events.interrupt.soft;
1568

    
1569
    env->nmi_injected = events.nmi.injected;
1570
    env->nmi_pending = events.nmi.pending;
1571
    if (events.nmi.masked) {
1572
        env->hflags2 |= HF2_NMI_MASK;
1573
    } else {
1574
        env->hflags2 &= ~HF2_NMI_MASK;
1575
    }
1576

    
1577
    env->sipi_vector = events.sipi_vector;
1578

    
1579
    return 0;
1580
}
1581

    
1582
static int kvm_guest_debug_workarounds(X86CPU *cpu)
1583
{
1584
    CPUX86State *env = &cpu->env;
1585
    int ret = 0;
1586
    unsigned long reinject_trap = 0;
1587

    
1588
    if (!kvm_has_vcpu_events()) {
1589
        if (env->exception_injected == 1) {
1590
            reinject_trap = KVM_GUESTDBG_INJECT_DB;
1591
        } else if (env->exception_injected == 3) {
1592
            reinject_trap = KVM_GUESTDBG_INJECT_BP;
1593
        }
1594
        env->exception_injected = -1;
1595
    }
1596

    
1597
    /*
1598
     * Kernels before KVM_CAP_X86_ROBUST_SINGLESTEP overwrote flags.TF
1599
     * injected via SET_GUEST_DEBUG while updating GP regs. Work around this
1600
     * by updating the debug state once again if single-stepping is on.
1601
     * Another reason to call kvm_update_guest_debug here is a pending debug
1602
     * trap raise by the guest. On kernels without SET_VCPU_EVENTS we have to
1603
     * reinject them via SET_GUEST_DEBUG.
1604
     */
1605
    if (reinject_trap ||
1606
        (!kvm_has_robust_singlestep() && env->singlestep_enabled)) {
1607
        ret = kvm_update_guest_debug(env, reinject_trap);
1608
    }
1609
    return ret;
1610
}
1611

    
1612
static int kvm_put_debugregs(X86CPU *cpu)
1613
{
1614
    CPUX86State *env = &cpu->env;
1615
    struct kvm_debugregs dbgregs;
1616
    int i;
1617

    
1618
    if (!kvm_has_debugregs()) {
1619
        return 0;
1620
    }
1621

    
1622
    for (i = 0; i < 4; i++) {
1623
        dbgregs.db[i] = env->dr[i];
1624
    }
1625
    dbgregs.dr6 = env->dr[6];
1626
    dbgregs.dr7 = env->dr[7];
1627
    dbgregs.flags = 0;
1628

    
1629
    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEBUGREGS, &dbgregs);
1630
}
1631

    
1632
static int kvm_get_debugregs(X86CPU *cpu)
1633
{
1634
    CPUX86State *env = &cpu->env;
1635
    struct kvm_debugregs dbgregs;
1636
    int i, ret;
1637

    
1638
    if (!kvm_has_debugregs()) {
1639
        return 0;
1640
    }
1641

    
1642
    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEBUGREGS, &dbgregs);
1643
    if (ret < 0) {
1644
        return ret;
1645
    }
1646
    for (i = 0; i < 4; i++) {
1647
        env->dr[i] = dbgregs.db[i];
1648
    }
1649
    env->dr[4] = env->dr[6] = dbgregs.dr6;
1650
    env->dr[5] = env->dr[7] = dbgregs.dr7;
1651

    
1652
    return 0;
1653
}
1654

    
1655
int kvm_arch_put_registers(CPUState *cpu, int level)
1656
{
1657
    X86CPU *x86_cpu = X86_CPU(cpu);
1658
    int ret;
1659

    
1660
    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
1661

    
1662
    ret = kvm_getput_regs(x86_cpu, 1);
1663
    if (ret < 0) {
1664
        return ret;
1665
    }
1666
    ret = kvm_put_xsave(x86_cpu);
1667
    if (ret < 0) {
1668
        return ret;
1669
    }
1670
    ret = kvm_put_xcrs(x86_cpu);
1671
    if (ret < 0) {
1672
        return ret;
1673
    }
1674
    ret = kvm_put_sregs(x86_cpu);
1675
    if (ret < 0) {
1676
        return ret;
1677
    }
1678
    /* must be before kvm_put_msrs */
1679
    ret = kvm_inject_mce_oldstyle(x86_cpu);
1680
    if (ret < 0) {
1681
        return ret;
1682
    }
1683
    ret = kvm_put_msrs(x86_cpu, level);
1684
    if (ret < 0) {
1685
        return ret;
1686
    }
1687
    if (level >= KVM_PUT_RESET_STATE) {
1688
        ret = kvm_put_mp_state(x86_cpu);
1689
        if (ret < 0) {
1690
            return ret;
1691
        }
1692
        ret = kvm_put_apic(x86_cpu);
1693
        if (ret < 0) {
1694
            return ret;
1695
        }
1696
    }
1697
    ret = kvm_put_vcpu_events(x86_cpu, level);
1698
    if (ret < 0) {
1699
        return ret;
1700
    }
1701
    ret = kvm_put_debugregs(x86_cpu);
1702
    if (ret < 0) {
1703
        return ret;
1704
    }
1705
    /* must be last */
1706
    ret = kvm_guest_debug_workarounds(x86_cpu);
1707
    if (ret < 0) {
1708
        return ret;
1709
    }
1710
    return 0;
1711
}
1712

    
1713
int kvm_arch_get_registers(CPUState *cs)
1714
{
1715
    X86CPU *cpu = X86_CPU(cs);
1716
    int ret;
1717

    
1718
    assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs));
1719

    
1720
    ret = kvm_getput_regs(cpu, 0);
1721
    if (ret < 0) {
1722
        return ret;
1723
    }
1724
    ret = kvm_get_xsave(cpu);
1725
    if (ret < 0) {
1726
        return ret;
1727
    }
1728
    ret = kvm_get_xcrs(cpu);
1729
    if (ret < 0) {
1730
        return ret;
1731
    }
1732
    ret = kvm_get_sregs(cpu);
1733
    if (ret < 0) {
1734
        return ret;
1735
    }
1736
    ret = kvm_get_msrs(cpu);
1737
    if (ret < 0) {
1738
        return ret;
1739
    }
1740
    ret = kvm_get_mp_state(cpu);
1741
    if (ret < 0) {
1742
        return ret;
1743
    }
1744
    ret = kvm_get_apic(cpu);
1745
    if (ret < 0) {
1746
        return ret;
1747
    }
1748
    ret = kvm_get_vcpu_events(cpu);
1749
    if (ret < 0) {
1750
        return ret;
1751
    }
1752
    ret = kvm_get_debugregs(cpu);
1753
    if (ret < 0) {
1754
        return ret;
1755
    }
1756
    return 0;
1757
}
1758

    
1759
void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
1760
{
1761
    X86CPU *x86_cpu = X86_CPU(cpu);
1762
    CPUX86State *env = &x86_cpu->env;
1763
    int ret;
1764

    
1765
    /* Inject NMI */
1766
    if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
1767
        cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
1768
        DPRINTF("injected NMI\n");
1769
        ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
1770
        if (ret < 0) {
1771
            fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
1772
                    strerror(-ret));
1773
        }
1774
    }
1775

    
1776
    if (!kvm_irqchip_in_kernel()) {
1777
        /* Force the VCPU out of its inner loop to process any INIT requests
1778
         * or pending TPR access reports. */
1779
        if (cpu->interrupt_request &
1780
            (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
1781
            cpu->exit_request = 1;
1782
        }
1783

    
1784
        /* Try to inject an interrupt if the guest can accept it */
1785
        if (run->ready_for_interrupt_injection &&
1786
            (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
1787
            (env->eflags & IF_MASK)) {
1788
            int irq;
1789

    
1790
            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
1791
            irq = cpu_get_pic_interrupt(env);
1792
            if (irq >= 0) {
1793
                struct kvm_interrupt intr;
1794

    
1795
                intr.irq = irq;
1796
                DPRINTF("injected interrupt %d\n", irq);
1797
                ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
1798
                if (ret < 0) {
1799
                    fprintf(stderr,
1800
                            "KVM: injection failed, interrupt lost (%s)\n",
1801
                            strerror(-ret));
1802
                }
1803
            }
1804
        }
1805

    
1806
        /* If we have an interrupt but the guest is not ready to receive an
1807
         * interrupt, request an interrupt window exit.  This will
1808
         * cause a return to userspace as soon as the guest is ready to
1809
         * receive interrupts. */
1810
        if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
1811
            run->request_interrupt_window = 1;
1812
        } else {
1813
            run->request_interrupt_window = 0;
1814
        }
1815

    
1816
        DPRINTF("setting tpr\n");
1817
        run->cr8 = cpu_get_apic_tpr(env->apic_state);
1818
    }
1819
}
1820

    
1821
void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1822
{
1823
    X86CPU *x86_cpu = X86_CPU(cpu);
1824
    CPUX86State *env = &x86_cpu->env;
1825

    
1826
    if (run->if_flag) {
1827
        env->eflags |= IF_MASK;
1828
    } else {
1829
        env->eflags &= ~IF_MASK;
1830
    }
1831
    cpu_set_apic_tpr(env->apic_state, run->cr8);
1832
    cpu_set_apic_base(env->apic_state, run->apic_base);
1833
}
1834

    
1835
int kvm_arch_process_async_events(CPUState *cs)
1836
{
1837
    X86CPU *cpu = X86_CPU(cs);
1838
    CPUX86State *env = &cpu->env;
1839

    
1840
    if (cs->interrupt_request & CPU_INTERRUPT_MCE) {
1841
        /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */
1842
        assert(env->mcg_cap);
1843

    
1844
        cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
1845

    
1846
        kvm_cpu_synchronize_state(env);
1847

    
1848
        if (env->exception_injected == EXCP08_DBLE) {
1849
            /* this means triple fault */
1850
            qemu_system_reset_request();
1851
            cs->exit_request = 1;
1852
            return 0;
1853
        }
1854
        env->exception_injected = EXCP12_MCHK;
1855
        env->has_error_code = 0;
1856

    
1857
        cs->halted = 0;
1858
        if (kvm_irqchip_in_kernel() && env->mp_state == KVM_MP_STATE_HALTED) {
1859
            env->mp_state = KVM_MP_STATE_RUNNABLE;
1860
        }
1861
    }
1862

    
1863
    if (kvm_irqchip_in_kernel()) {
1864
        return 0;
1865
    }
1866

    
1867
    if (cs->interrupt_request & CPU_INTERRUPT_POLL) {
1868
        cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
1869
        apic_poll_irq(env->apic_state);
1870
    }
1871
    if (((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1872
         (env->eflags & IF_MASK)) ||
1873
        (cs->interrupt_request & CPU_INTERRUPT_NMI)) {
1874
        cs->halted = 0;
1875
    }
1876
    if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
1877
        kvm_cpu_synchronize_state(env);
1878
        do_cpu_init(cpu);
1879
    }
1880
    if (cs->interrupt_request & CPU_INTERRUPT_SIPI) {
1881
        kvm_cpu_synchronize_state(env);
1882
        do_cpu_sipi(cpu);
1883
    }
1884
    if (cs->interrupt_request & CPU_INTERRUPT_TPR) {
1885
        cs->interrupt_request &= ~CPU_INTERRUPT_TPR;
1886
        kvm_cpu_synchronize_state(env);
1887
        apic_handle_tpr_access_report(env->apic_state, env->eip,
1888
                                      env->tpr_access_type);
1889
    }
1890

    
1891
    return cs->halted;
1892
}
1893

    
1894
static int kvm_handle_halt(X86CPU *cpu)
1895
{
1896
    CPUState *cs = CPU(cpu);
1897
    CPUX86State *env = &cpu->env;
1898

    
1899
    if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1900
          (env->eflags & IF_MASK)) &&
1901
        !(cs->interrupt_request & CPU_INTERRUPT_NMI)) {
1902
        cs->halted = 1;
1903
        return EXCP_HLT;
1904
    }
1905

    
1906
    return 0;
1907
}
1908

    
1909
static int kvm_handle_tpr_access(X86CPU *cpu)
1910
{
1911
    CPUX86State *env = &cpu->env;
1912
    CPUState *cs = CPU(cpu);
1913
    struct kvm_run *run = cs->kvm_run;
1914

    
1915
    apic_handle_tpr_access_report(env->apic_state, run->tpr_access.rip,
1916
                                  run->tpr_access.is_write ? TPR_ACCESS_WRITE
1917
                                                           : TPR_ACCESS_READ);
1918
    return 1;
1919
}
1920

    
1921
int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1922
{
1923
    CPUX86State *env = &X86_CPU(cpu)->env;
1924
    static const uint8_t int3 = 0xcc;
1925

    
1926
    if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
1927
        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) {
1928
        return -EINVAL;
1929
    }
1930
    return 0;
1931
}
1932

    
1933
int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1934
{
1935
    CPUX86State *env = &X86_CPU(cpu)->env;
1936
    uint8_t int3;
1937

    
1938
    if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
1939
        cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) {
1940
        return -EINVAL;
1941
    }
1942
    return 0;
1943
}
1944

    
1945
static struct {
1946
    target_ulong addr;
1947
    int len;
1948
    int type;
1949
} hw_breakpoint[4];
1950

    
1951
static int nb_hw_breakpoint;
1952

    
1953
static int find_hw_breakpoint(target_ulong addr, int len, int type)
1954
{
1955
    int n;
1956

    
1957
    for (n = 0; n < nb_hw_breakpoint; n++) {
1958
        if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
1959
            (hw_breakpoint[n].len == len || len == -1)) {
1960
            return n;
1961
        }
1962
    }
1963
    return -1;
1964
}
1965

    
1966
int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1967
                                  target_ulong len, int type)
1968
{
1969
    switch (type) {
1970
    case GDB_BREAKPOINT_HW:
1971
        len = 1;
1972
        break;
1973
    case GDB_WATCHPOINT_WRITE:
1974
    case GDB_WATCHPOINT_ACCESS:
1975
        switch (len) {
1976
        case 1:
1977
            break;
1978
        case 2:
1979
        case 4:
1980
        case 8:
1981
            if (addr & (len - 1)) {
1982
                return -EINVAL;
1983
            }
1984
            break;
1985
        default:
1986
            return -EINVAL;
1987
        }
1988
        break;
1989
    default:
1990
        return -ENOSYS;
1991
    }
1992

    
1993
    if (nb_hw_breakpoint == 4) {
1994
        return -ENOBUFS;
1995
    }
1996
    if (find_hw_breakpoint(addr, len, type) >= 0) {
1997
        return -EEXIST;
1998
    }
1999
    hw_breakpoint[nb_hw_breakpoint].addr = addr;
2000
    hw_breakpoint[nb_hw_breakpoint].len = len;
2001
    hw_breakpoint[nb_hw_breakpoint].type = type;
2002
    nb_hw_breakpoint++;
2003

    
2004
    return 0;
2005
}
2006

    
2007
int kvm_arch_remove_hw_breakpoint(target_ulong addr,
2008
                                  target_ulong len, int type)
2009
{
2010
    int n;
2011

    
2012
    n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
2013
    if (n < 0) {
2014
        return -ENOENT;
2015
    }
2016
    nb_hw_breakpoint--;
2017
    hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
2018

    
2019
    return 0;
2020
}
2021

    
2022
void kvm_arch_remove_all_hw_breakpoints(void)
2023
{
2024
    nb_hw_breakpoint = 0;
2025
}
2026

    
2027
static CPUWatchpoint hw_watchpoint;
2028

    
2029
static int kvm_handle_debug(X86CPU *cpu,
2030
                            struct kvm_debug_exit_arch *arch_info)
2031
{
2032
    CPUX86State *env = &cpu->env;
2033
    int ret = 0;
2034
    int n;
2035

    
2036
    if (arch_info->exception == 1) {
2037
        if (arch_info->dr6 & (1 << 14)) {
2038
            if (env->singlestep_enabled) {
2039
                ret = EXCP_DEBUG;
2040
            }
2041
        } else {
2042
            for (n = 0; n < 4; n++) {
2043
                if (arch_info->dr6 & (1 << n)) {
2044
                    switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
2045
                    case 0x0:
2046
                        ret = EXCP_DEBUG;
2047
                        break;
2048
                    case 0x1:
2049
                        ret = EXCP_DEBUG;
2050
                        env->watchpoint_hit = &hw_watchpoint;
2051
                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
2052
                        hw_watchpoint.flags = BP_MEM_WRITE;
2053
                        break;
2054
                    case 0x3:
2055
                        ret = EXCP_DEBUG;
2056
                        env->watchpoint_hit = &hw_watchpoint;
2057
                        hw_watchpoint.vaddr = hw_breakpoint[n].addr;
2058
                        hw_watchpoint.flags = BP_MEM_ACCESS;
2059
                        break;
2060
                    }
2061
                }
2062
            }
2063
        }
2064
    } else if (kvm_find_sw_breakpoint(CPU(cpu), arch_info->pc)) {
2065
        ret = EXCP_DEBUG;
2066
    }
2067
    if (ret == 0) {
2068
        cpu_synchronize_state(env);
2069
        assert(env->exception_injected == -1);
2070

    
2071
        /* pass to guest */
2072
        env->exception_injected = arch_info->exception;
2073
        env->has_error_code = 0;
2074
    }
2075

    
2076
    return ret;
2077
}
2078

    
2079
void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
2080
{
2081
    const uint8_t type_code[] = {
2082
        [GDB_BREAKPOINT_HW] = 0x0,
2083
        [GDB_WATCHPOINT_WRITE] = 0x1,
2084
        [GDB_WATCHPOINT_ACCESS] = 0x3
2085
    };
2086
    const uint8_t len_code[] = {
2087
        [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
2088
    };
2089
    int n;
2090

    
2091
    if (kvm_sw_breakpoints_active(cpu)) {
2092
        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
2093
    }
2094
    if (nb_hw_breakpoint > 0) {
2095
        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
2096
        dbg->arch.debugreg[7] = 0x0600;
2097
        for (n = 0; n < nb_hw_breakpoint; n++) {
2098
            dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
2099
            dbg->arch.debugreg[7] |= (2 << (n * 2)) |
2100
                (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
2101
                ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4));
2102
        }
2103
    }
2104
}
2105

    
2106
static bool host_supports_vmx(void)
2107
{
2108
    uint32_t ecx, unused;
2109

    
2110
    host_cpuid(1, 0, &unused, &unused, &ecx, &unused);
2111
    return ecx & CPUID_EXT_VMX;
2112
}
2113

    
2114
#define VMX_INVALID_GUEST_STATE 0x80000021
2115

    
2116
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
2117
{
2118
    X86CPU *cpu = X86_CPU(cs);
2119
    uint64_t code;
2120
    int ret;
2121

    
2122
    switch (run->exit_reason) {
2123
    case KVM_EXIT_HLT:
2124
        DPRINTF("handle_hlt\n");
2125
        ret = kvm_handle_halt(cpu);
2126
        break;
2127
    case KVM_EXIT_SET_TPR:
2128
        ret = 0;
2129
        break;
2130
    case KVM_EXIT_TPR_ACCESS:
2131
        ret = kvm_handle_tpr_access(cpu);
2132
        break;
2133
    case KVM_EXIT_FAIL_ENTRY:
2134
        code = run->fail_entry.hardware_entry_failure_reason;
2135
        fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n",
2136
                code);
2137
        if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) {
2138
            fprintf(stderr,
2139
                    "\nIf you're running a guest on an Intel machine without "
2140
                        "unrestricted mode\n"
2141
                    "support, the failure can be most likely due to the guest "
2142
                        "entering an invalid\n"
2143
                    "state for Intel VT. For example, the guest maybe running "
2144
                        "in big real mode\n"
2145
                    "which is not supported on less recent Intel processors."
2146
                        "\n\n");
2147
        }
2148
        ret = -1;
2149
        break;
2150
    case KVM_EXIT_EXCEPTION:
2151
        fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n",
2152
                run->ex.exception, run->ex.error_code);
2153
        ret = -1;
2154
        break;
2155
    case KVM_EXIT_DEBUG:
2156
        DPRINTF("kvm_exit_debug\n");
2157
        ret = kvm_handle_debug(cpu, &run->debug.arch);
2158
        break;
2159
    default:
2160
        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
2161
        ret = -1;
2162
        break;
2163
    }
2164

    
2165
    return ret;
2166
}
2167

    
2168
bool kvm_arch_stop_on_emulation_error(CPUState *cs)
2169
{
2170
    X86CPU *cpu = X86_CPU(cs);
2171
    CPUX86State *env = &cpu->env;
2172

    
2173
    kvm_cpu_synchronize_state(env);
2174
    return !(env->cr[0] & CR0_PE_MASK) ||
2175
           ((env->segs[R_CS].selector  & 3) != 3);
2176
}
2177

    
2178
void kvm_arch_init_irq_routing(KVMState *s)
2179
{
2180
    if (!kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
2181
        /* If kernel can't do irq routing, interrupt source
2182
         * override 0->2 cannot be set up as required by HPET.
2183
         * So we have to disable it.
2184
         */
2185
        no_hpet = 1;
2186
    }
2187
    /* We know at this point that we're using the in-kernel
2188
     * irqchip, so we can use irqfds, and on x86 we know
2189
     * we can use msi via irqfd and GSI routing.
2190
     */
2191
    kvm_irqfds_allowed = true;
2192
    kvm_msi_via_irqfd_allowed = true;
2193
    kvm_gsi_routing_allowed = true;
2194
}
2195

    
2196
/* Classic KVM device assignment interface. Will remain x86 only. */
2197
int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr,
2198
                          uint32_t flags, uint32_t *dev_id)
2199
{
2200
    struct kvm_assigned_pci_dev dev_data = {
2201
        .segnr = dev_addr->domain,
2202
        .busnr = dev_addr->bus,
2203
        .devfn = PCI_DEVFN(dev_addr->slot, dev_addr->function),
2204
        .flags = flags,
2205
    };
2206
    int ret;
2207

    
2208
    dev_data.assigned_dev_id =
2209
        (dev_addr->domain << 16) | (dev_addr->bus << 8) | dev_data.devfn;
2210

    
2211
    ret = kvm_vm_ioctl(s, KVM_ASSIGN_PCI_DEVICE, &dev_data);
2212
    if (ret < 0) {
2213
        return ret;
2214
    }
2215

    
2216
    *dev_id = dev_data.assigned_dev_id;
2217

    
2218
    return 0;
2219
}
2220

    
2221
int kvm_device_pci_deassign(KVMState *s, uint32_t dev_id)
2222
{
2223
    struct kvm_assigned_pci_dev dev_data = {
2224
        .assigned_dev_id = dev_id,
2225
    };
2226

    
2227
    return kvm_vm_ioctl(s, KVM_DEASSIGN_PCI_DEVICE, &dev_data);
2228
}
2229

    
2230
static int kvm_assign_irq_internal(KVMState *s, uint32_t dev_id,
2231
                                   uint32_t irq_type, uint32_t guest_irq)
2232
{
2233
    struct kvm_assigned_irq assigned_irq = {
2234
        .assigned_dev_id = dev_id,
2235
        .guest_irq = guest_irq,
2236
        .flags = irq_type,
2237
    };
2238

    
2239
    if (kvm_check_extension(s, KVM_CAP_ASSIGN_DEV_IRQ)) {
2240
        return kvm_vm_ioctl(s, KVM_ASSIGN_DEV_IRQ, &assigned_irq);
2241
    } else {
2242
        return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, &assigned_irq);
2243
    }
2244
}
2245

    
2246
int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, bool use_host_msi,
2247
                           uint32_t guest_irq)
2248
{
2249
    uint32_t irq_type = KVM_DEV_IRQ_GUEST_INTX |
2250
        (use_host_msi ? KVM_DEV_IRQ_HOST_MSI : KVM_DEV_IRQ_HOST_INTX);
2251

    
2252
    return kvm_assign_irq_internal(s, dev_id, irq_type, guest_irq);
2253
}
2254

    
2255
int kvm_device_intx_set_mask(KVMState *s, uint32_t dev_id, bool masked)
2256
{
2257
    struct kvm_assigned_pci_dev dev_data = {
2258
        .assigned_dev_id = dev_id,
2259
        .flags = masked ? KVM_DEV_ASSIGN_MASK_INTX : 0,
2260
    };
2261

    
2262
    return kvm_vm_ioctl(s, KVM_ASSIGN_SET_INTX_MASK, &dev_data);
2263
}
2264

    
2265
static int kvm_deassign_irq_internal(KVMState *s, uint32_t dev_id,
2266
                                     uint32_t type)
2267
{
2268
    struct kvm_assigned_irq assigned_irq = {
2269
        .assigned_dev_id = dev_id,
2270
        .flags = type,
2271
    };
2272

    
2273
    return kvm_vm_ioctl(s, KVM_DEASSIGN_DEV_IRQ, &assigned_irq);
2274
}
2275

    
2276
int kvm_device_intx_deassign(KVMState *s, uint32_t dev_id, bool use_host_msi)
2277
{
2278
    return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_INTX |
2279
        (use_host_msi ? KVM_DEV_IRQ_HOST_MSI : KVM_DEV_IRQ_HOST_INTX));
2280
}
2281

    
2282
int kvm_device_msi_assign(KVMState *s, uint32_t dev_id, int virq)
2283
{
2284
    return kvm_assign_irq_internal(s, dev_id, KVM_DEV_IRQ_HOST_MSI |
2285
                                              KVM_DEV_IRQ_GUEST_MSI, virq);
2286
}
2287

    
2288
int kvm_device_msi_deassign(KVMState *s, uint32_t dev_id)
2289
{
2290
    return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSI |
2291
                                                KVM_DEV_IRQ_HOST_MSI);
2292
}
2293

    
2294
bool kvm_device_msix_supported(KVMState *s)
2295
{
2296
    /* The kernel lacks a corresponding KVM_CAP, so we probe by calling
2297
     * KVM_ASSIGN_SET_MSIX_NR with an invalid parameter. */
2298
    return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_NR, NULL) == -EFAULT;
2299
}
2300

    
2301
int kvm_device_msix_init_vectors(KVMState *s, uint32_t dev_id,
2302
                                 uint32_t nr_vectors)
2303
{
2304
    struct kvm_assigned_msix_nr msix_nr = {
2305
        .assigned_dev_id = dev_id,
2306
        .entry_nr = nr_vectors,
2307
    };
2308

    
2309
    return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_NR, &msix_nr);
2310
}
2311

    
2312
int kvm_device_msix_set_vector(KVMState *s, uint32_t dev_id, uint32_t vector,
2313
                               int virq)
2314
{
2315
    struct kvm_assigned_msix_entry msix_entry = {
2316
        .assigned_dev_id = dev_id,
2317
        .gsi = virq,
2318
        .entry = vector,
2319
    };
2320

    
2321
    return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_ENTRY, &msix_entry);
2322
}
2323

    
2324
int kvm_device_msix_assign(KVMState *s, uint32_t dev_id)
2325
{
2326
    return kvm_assign_irq_internal(s, dev_id, KVM_DEV_IRQ_HOST_MSIX |
2327
                                              KVM_DEV_IRQ_GUEST_MSIX, 0);
2328
}
2329

    
2330
int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id)
2331
{
2332
    return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSIX |
2333
                                                KVM_DEV_IRQ_HOST_MSIX);
2334
}