Revision 05330448

b/kvm-all.c
1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10
 * See the COPYING file in the top-level directory.
11
 *
12
 */
13

  
14
#include <sys/types.h>
15
#include <sys/ioctl.h>
16
#include <sys/mman.h>
17

  
18
#include <linux/kvm.h>
19

  
20
#include "qemu-common.h"
21
#include "sysemu.h"
22
#include "kvm.h"
23

  
24
//#define DEBUG_KVM
25

  
26
#ifdef DEBUG_KVM
27
#define dprintf(fmt, ...) \
28
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
29
#else
30
#define dprintf(fmt, ...) \
31
    do { } while (0)
32
#endif
33

  
34
typedef struct kvm_userspace_memory_region KVMSlot;
35

  
36
int kvm_allowed = 0;
37

  
38
struct KVMState
39
{
40
    KVMSlot slots[32];
41
    int fd;
42
    int vmfd;
43
};
44

  
45
static KVMState *kvm_state;
46

  
47
static KVMSlot *kvm_alloc_slot(KVMState *s)
48
{
49
    int i;
50

  
51
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
52
        if (s->slots[i].memory_size == 0)
53
            return &s->slots[i];
54
    }
55

  
56
    return NULL;
57
}
58

  
59
static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr)
60
{
61
    int i;
62

  
63
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
64
        KVMSlot *mem = &s->slots[i];
65

  
66
        if (start_addr >= mem->guest_phys_addr &&
67
            start_addr < (mem->guest_phys_addr + mem->memory_size))
68
            return mem;
69
    }
70

  
71
    return NULL;
72
}
73

  
74
int kvm_init_vcpu(CPUState *env)
75
{
76
    KVMState *s = kvm_state;
77
    long mmap_size;
78
    int ret;
79

  
80
    dprintf("kvm_init_vcpu\n");
81

  
82
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU,
83
                       (void *)(unsigned long)env->cpu_index);
84
    if (ret < 0) {
85
        dprintf("kvm_create_vcpu failed\n");
86
        goto err;
87
    }
88

  
89
    env->kvm_fd = ret;
90
    env->kvm_state = s;
91

  
92
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
93
    if (mmap_size < 0) {
94
        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
95
        goto err;
96
    }
97

  
98
    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
99
                        env->kvm_fd, 0);
100
    if (env->kvm_run == MAP_FAILED) {
101
        ret = -errno;
102
        dprintf("mmap'ing vcpu state failed\n");
103
        goto err;
104
    }
105

  
106
    ret = kvm_arch_init_vcpu(env);
107

  
108
err:
109
    return ret;
110
}
111

  
112
int kvm_init(int smp_cpus)
113
{
114
    KVMState *s;
115
    int ret;
116
    int i;
117

  
118
    if (smp_cpus > 1)
119
        return -EINVAL;
120

  
121
    s = qemu_mallocz(sizeof(KVMState));
122
    if (s == NULL)
123
        return -ENOMEM;
124

  
125
    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
126
        s->slots[i].slot = i;
127

  
128
    s->vmfd = -1;
129
    s->fd = open("/dev/kvm", O_RDWR);
130
    if (s->fd == -1) {
131
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
132
        ret = -errno;
133
        goto err;
134
    }
135

  
136
    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
137
    if (ret < KVM_API_VERSION) {
138
        if (ret > 0)
139
            ret = -EINVAL;
140
        fprintf(stderr, "kvm version too old\n");
141
        goto err;
142
    }
143

  
144
    if (ret > KVM_API_VERSION) {
145
        ret = -EINVAL;
146
        fprintf(stderr, "kvm version not supported\n");
147
        goto err;
148
    }
149

  
150
    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
151
    if (s->vmfd < 0)
152
        goto err;
153

  
154
    /* initially, KVM allocated its own memory and we had to jump through
155
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
156
     * just use a user allocated buffer so we can use phys_ram_base
157
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
158
     */
159
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY);
160
    if (ret <= 0) {
161
        if (ret == 0)
162
            ret = -EINVAL;
163
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
164
        goto err;
165
    }
166

  
167
    ret = kvm_arch_init(s, smp_cpus);
168
    if (ret < 0)
169
        goto err;
170

  
171
    kvm_state = s;
172

  
173
    return 0;
174

  
175
err:
176
    if (s) {
177
        if (s->vmfd != -1)
178
            close(s->vmfd);
179
        if (s->fd != -1)
180
            close(s->fd);
181
    }
182
    qemu_free(s);
183

  
184
    return ret;
185
}
186

  
187
static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
188
                         int direction, int size, uint32_t count)
189
{
190
    int i;
191
    uint8_t *ptr = data;
192

  
193
    for (i = 0; i < count; i++) {
194
        if (direction == KVM_EXIT_IO_IN) {
195
            switch (size) {
196
            case 1:
197
                stb_p(ptr, cpu_inb(env, port));
198
                break;
199
            case 2:
200
                stw_p(ptr, cpu_inw(env, port));
201
                break;
202
            case 4:
203
                stl_p(ptr, cpu_inl(env, port));
204
                break;
205
            }
206
        } else {
207
            switch (size) {
208
            case 1:
209
                cpu_outb(env, port, ldub_p(ptr));
210
                break;
211
            case 2:
212
                cpu_outw(env, port, lduw_p(ptr));
213
                break;
214
            case 4:
215
                cpu_outl(env, port, ldl_p(ptr));
216
                break;
217
            }
218
        }
219

  
220
        ptr += size;
221
    }
222

  
223
    return 1;
224
}
225

  
226
int kvm_cpu_exec(CPUState *env)
227
{
228
    struct kvm_run *run = env->kvm_run;
229
    int ret;
230

  
231
    dprintf("kvm_cpu_exec()\n");
232

  
233
    do {
234
        kvm_arch_pre_run(env, run);
235

  
236
        if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
237
            dprintf("interrupt exit requested\n");
238
            ret = 0;
239
            break;
240
        }
241

  
242
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
243
        kvm_arch_post_run(env, run);
244

  
245
        if (ret == -EINTR || ret == -EAGAIN) {
246
            dprintf("io window exit\n");
247
            ret = 0;
248
            break;
249
        }
250

  
251
        if (ret < 0) {
252
            dprintf("kvm run failed %s\n", strerror(-ret));
253
            abort();
254
        }
255

  
256
        ret = 0; /* exit loop */
257
        switch (run->exit_reason) {
258
        case KVM_EXIT_IO:
259
            dprintf("handle_io\n");
260
            ret = kvm_handle_io(env, run->io.port,
261
                                (uint8_t *)run + run->io.data_offset,
262
                                run->io.direction,
263
                                run->io.size,
264
                                run->io.count);
265
            break;
266
        case KVM_EXIT_MMIO:
267
            dprintf("handle_mmio\n");
268
            cpu_physical_memory_rw(run->mmio.phys_addr,
269
                                   run->mmio.data,
270
                                   run->mmio.len,
271
                                   run->mmio.is_write);
272
            ret = 1;
273
            break;
274
        case KVM_EXIT_IRQ_WINDOW_OPEN:
275
            dprintf("irq_window_open\n");
276
            break;
277
        case KVM_EXIT_SHUTDOWN:
278
            dprintf("shutdown\n");
279
            qemu_system_reset_request();
280
            ret = 1;
281
            break;
282
        case KVM_EXIT_UNKNOWN:
283
            dprintf("kvm_exit_unknown\n");
284
            break;
285
        case KVM_EXIT_FAIL_ENTRY:
286
            dprintf("kvm_exit_fail_entry\n");
287
            break;
288
        case KVM_EXIT_EXCEPTION:
289
            dprintf("kvm_exit_exception\n");
290
            break;
291
        case KVM_EXIT_DEBUG:
292
            dprintf("kvm_exit_debug\n");
293
            break;
294
        default:
295
            dprintf("kvm_arch_handle_exit\n");
296
            ret = kvm_arch_handle_exit(env, run);
297
            break;
298
        }
299
    } while (ret > 0);
300

  
301
    return ret;
302
}
303

  
304
void kvm_set_phys_mem(target_phys_addr_t start_addr,
305
                      ram_addr_t size,
306
                      ram_addr_t phys_offset)
307
{
308
    KVMState *s = kvm_state;
309
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
310
    KVMSlot *mem;
311

  
312
    /* KVM does not support read-only slots */
313
    phys_offset &= ~IO_MEM_ROM;
314

  
315
    mem = kvm_lookup_slot(s, start_addr);
316
    if (mem) {
317
        if (flags == IO_MEM_UNASSIGNED) {
318
            mem->memory_size = 0;
319
            mem->guest_phys_addr = start_addr;
320
            mem->userspace_addr = 0;
321
            mem->flags = 0;
322

  
323
            kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem);
324
        } else if (start_addr >= mem->guest_phys_addr &&
325
                   (start_addr + size) <= (mem->guest_phys_addr + mem->memory_size))
326
            return;
327
    }
328

  
329
    /* KVM does not need to know about this memory */
330
    if (flags >= IO_MEM_UNASSIGNED)
331
        return;
332

  
333
    mem = kvm_alloc_slot(s);
334
    mem->memory_size = size;
335
    mem->guest_phys_addr = start_addr;
336
    mem->userspace_addr = (unsigned long)(phys_ram_base + phys_offset);
337
    mem->flags = 0;
338

  
339
    kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem);
340
    /* FIXME deal with errors */
341
}
342

  
343
int kvm_ioctl(KVMState *s, int type, void *data)
344
{
345
    int ret;
346

  
347
    ret = ioctl(s->fd, type, data);
348
    if (ret == -1)
349
        ret = -errno;
350

  
351
    return ret;
352
}
353

  
354
int kvm_vm_ioctl(KVMState *s, int type, void *data)
355
{
356
    int ret;
357

  
358
    ret = ioctl(s->vmfd, type, data);
359
    if (ret == -1)
360
        ret = -errno;
361

  
362
    return ret;
363
}
364

  
365
int kvm_vcpu_ioctl(CPUState *env, int type, void *data)
366
{
367
    int ret;
368

  
369
    ret = ioctl(env->kvm_fd, type, data);
370
    if (ret == -1)
371
        ret = -errno;
372

  
373
    return ret;
374
}
b/kvm.h
1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright IBM, Corp. 2008
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10
 * See the COPYING file in the top-level directory.
11
 *
12
 */
13

  
14
#ifndef QEMU_KVM_H
15
#define QEMU_KVM_H
16

  
17
#include "config.h"
18

  
19
#ifdef CONFIG_KVM
20
extern int kvm_allowed;
21

  
22
#define kvm_enabled() (kvm_allowed)
23
#else
24
#define kvm_enabled() (0)
25
#endif
26

  
27
struct kvm_run;
28

  
29
/* external API */
30

  
31
int kvm_init(int smp_cpus);
32

  
33
int kvm_init_vcpu(CPUState *env);
34

  
35
int kvm_cpu_exec(CPUState *env);
36

  
37
void kvm_set_phys_mem(target_phys_addr_t start_addr,
38
                      ram_addr_t size,
39
                      ram_addr_t phys_offset);
40

  
41
/* internal API */
42

  
43
struct KVMState;
44
typedef struct KVMState KVMState;
45

  
46
int kvm_ioctl(KVMState *s, int type, void *data);
47

  
48
int kvm_vm_ioctl(KVMState *s, int type, void *data);
49

  
50
int kvm_vcpu_ioctl(CPUState *env, int type, void *data);
51

  
52
/* Arch specific hooks */
53

  
54
int kvm_arch_post_run(CPUState *env, struct kvm_run *run);
55

  
56
int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run);
57

  
58
int kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
59

  
60
int kvm_arch_get_registers(CPUState *env);
61

  
62
int kvm_arch_put_registers(CPUState *env);
63

  
64
int kvm_arch_init(KVMState *s, int smp_cpus);
65

  
66
int kvm_arch_init_vcpu(CPUState *env);
67

  
68
#endif
b/target-i386/kvm.c
1
/*
2
 * QEMU KVM support
3
 *
4
 * Copyright (C) 2006-2008 Qumranet Technologies
5
 * Copyright IBM, Corp. 2008
6
 *
7
 * Authors:
8
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *
10
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11
 * See the COPYING file in the top-level directory.
12
 *
13
 */
14

  
15
#include <sys/types.h>
16
#include <sys/ioctl.h>
17
#include <sys/mman.h>
18

  
19
#include <linux/kvm.h>
20

  
21
#include "qemu-common.h"
22
#include "sysemu.h"
23
#include "kvm.h"
24
#include "cpu.h"
25

  
26
//#define DEBUG_KVM
27

  
28
#ifdef DEBUG_KVM
29
#define dprintf(fmt, ...) \
30
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
31
#else
32
#define dprintf(fmt, ...) \
33
    do { } while (0)
34
#endif
35

  
36
int kvm_arch_init_vcpu(CPUState *env)
37
{
38
    struct {
39
        struct kvm_cpuid cpuid;
40
        struct kvm_cpuid_entry entries[100];
41
    } __attribute__((packed)) cpuid_data;
42
    int limit, i, cpuid_i;
43
    uint32_t eax, ebx, ecx, edx;
44

  
45
    cpuid_i = 0;
46

  
47
    cpu_x86_cpuid(env, 0, &eax, &ebx, &ecx, &edx);
48
    limit = eax;
49

  
50
    for (i = 0; i <= limit; i++) {
51
        struct kvm_cpuid_entry *c = &cpuid_data.entries[cpuid_i++];
52

  
53
        cpu_x86_cpuid(env, i, &eax, &ebx, &ecx, &edx);
54
        c->function = i;
55
        c->eax = eax;
56
        c->ebx = ebx;
57
        c->ecx = ecx;
58
        c->edx = edx;
59
    }
60

  
61
    cpu_x86_cpuid(env, 0x80000000, &eax, &ebx, &ecx, &edx);
62
    limit = eax;
63

  
64
    for (i = 0x80000000; i <= limit; i++) {
65
        struct kvm_cpuid_entry *c = &cpuid_data.entries[cpuid_i++];
66

  
67
        cpu_x86_cpuid(env, i, &eax, &ebx, &ecx, &edx);
68
        c->function = i;
69
        c->eax = eax;
70
        c->ebx = ebx;
71
        c->ecx = ecx;
72
        c->edx = edx;
73
    }
74

  
75
    cpuid_data.cpuid.nent = cpuid_i;
76

  
77
    return kvm_vcpu_ioctl(env, KVM_SET_CPUID, &cpuid_data);
78
}
79

  
80
static int kvm_has_msr_star(CPUState *env)
81
{
82
    static int has_msr_star;
83
    int ret;
84

  
85
    /* first time */
86
    if (has_msr_star == 0) {        
87
        struct kvm_msr_list msr_list, *kvm_msr_list;
88

  
89
        has_msr_star = -1;
90

  
91
        /* Obtain MSR list from KVM.  These are the MSRs that we must
92
         * save/restore */
93
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
94
        if (ret < 0)
95
            return 0;
96

  
97
        msr_list.nmsrs = 0;
98
        kvm_msr_list = qemu_mallocz(sizeof(msr_list) +
99
                                    msr_list.nmsrs * sizeof(msr_list.indices[0]));
100
        if (kvm_msr_list == NULL)
101
            return 0;
102

  
103
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
104
        if (ret >= 0) {
105
            int i;
106

  
107
            for (i = 0; i < kvm_msr_list->nmsrs; i++) {
108
                if (kvm_msr_list->indices[i] == MSR_STAR) {
109
                    has_msr_star = 1;
110
                    break;
111
                }
112
            }
113
        }
114

  
115
        free(kvm_msr_list);
116
    }
117

  
118
    if (has_msr_star == 1)
119
        return 1;
120
    return 0;
121
}
122

  
123
int kvm_arch_init(KVMState *s, int smp_cpus)
124
{
125
    int ret;
126

  
127
    /* create vm86 tss.  KVM uses vm86 mode to emulate 16-bit code
128
     * directly.  In order to use vm86 mode, a TSS is needed.  Since this
129
     * must be part of guest physical memory, we need to allocate it.  Older
130
     * versions of KVM just assumed that it would be at the end of physical
131
     * memory but that doesn't work with more than 4GB of memory.  We simply
132
     * refuse to work with those older versions of KVM. */
133
    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, (void *)KVM_CAP_SET_TSS_ADDR);
134
    if (ret <= 0) {
135
        fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
136
        return ret;
137
    }
138

  
139
    /* this address is 3 pages before the bios, and the bios should present
140
     * as unavaible memory.  FIXME, need to ensure the e820 map deals with
141
     * this?
142
     */
143
    return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, (void *)0xfffbd000);
144
}
145
                    
146
static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
147
{
148
    lhs->selector = rhs->selector;
149
    lhs->base = rhs->base;
150
    lhs->limit = rhs->limit;
151
    lhs->type = 3;
152
    lhs->present = 1;
153
    lhs->dpl = 3;
154
    lhs->db = 0;
155
    lhs->s = 1;
156
    lhs->l = 0;
157
    lhs->g = 0;
158
    lhs->avl = 0;
159
    lhs->unusable = 0;
160
}
161

  
162
static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
163
{
164
    unsigned flags = rhs->flags;
165
    lhs->selector = rhs->selector;
166
    lhs->base = rhs->base;
167
    lhs->limit = rhs->limit;
168
    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
169
    lhs->present = (flags & DESC_P_MASK) != 0;
170
    lhs->dpl = rhs->selector & 3;
171
    lhs->db = (flags >> DESC_B_SHIFT) & 1;
172
    lhs->s = (flags & DESC_S_MASK) != 0;
173
    lhs->l = (flags >> DESC_L_SHIFT) & 1;
174
    lhs->g = (flags & DESC_G_MASK) != 0;
175
    lhs->avl = (flags & DESC_AVL_MASK) != 0;
176
    lhs->unusable = 0;
177
}
178

  
179
static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
180
{
181
    lhs->selector = rhs->selector;
182
    lhs->base = rhs->base;
183
    lhs->limit = rhs->limit;
184
    lhs->flags =
185
	(rhs->type << DESC_TYPE_SHIFT)
186
	| (rhs->present * DESC_P_MASK)
187
	| (rhs->dpl << DESC_DPL_SHIFT)
188
	| (rhs->db << DESC_B_SHIFT)
189
	| (rhs->s * DESC_S_MASK)
190
	| (rhs->l << DESC_L_SHIFT)
191
	| (rhs->g * DESC_G_MASK)
192
	| (rhs->avl * DESC_AVL_MASK);
193
}
194

  
195
static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
196
{
197
    if (set)
198
        *kvm_reg = *qemu_reg;
199
    else
200
        *qemu_reg = *kvm_reg;
201
}
202

  
203
static int kvm_getput_regs(CPUState *env, int set)
204
{
205
    struct kvm_regs regs;
206
    int ret = 0;
207

  
208
    if (!set) {
209
        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
210
        if (ret < 0)
211
            return ret;
212
    }
213

  
214
    kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
215
    kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
216
    kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
217
    kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
218
    kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
219
    kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
220
    kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
221
    kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
222
#ifdef TARGET_X86_64
223
    kvm_getput_reg(&regs.r8, &env->regs[8], set);
224
    kvm_getput_reg(&regs.r9, &env->regs[9], set);
225
    kvm_getput_reg(&regs.r10, &env->regs[10], set);
226
    kvm_getput_reg(&regs.r11, &env->regs[11], set);
227
    kvm_getput_reg(&regs.r12, &env->regs[12], set);
228
    kvm_getput_reg(&regs.r13, &env->regs[13], set);
229
    kvm_getput_reg(&regs.r14, &env->regs[14], set);
230
    kvm_getput_reg(&regs.r15, &env->regs[15], set);
231
#endif
232

  
233
    kvm_getput_reg(&regs.rflags, &env->eflags, set);
234
    kvm_getput_reg(&regs.rip, &env->eip, set);
235

  
236
    if (set)
237
        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
238

  
239
    return ret;
240
}
241

  
242
static int kvm_put_fpu(CPUState *env)
243
{
244
    struct kvm_fpu fpu;
245
    int i;
246

  
247
    memset(&fpu, 0, sizeof fpu);
248
    fpu.fsw = env->fpus & ~(7 << 11);
249
    fpu.fsw |= (env->fpstt & 7) << 11;
250
    fpu.fcw = env->fpuc;
251
    for (i = 0; i < 8; ++i)
252
	fpu.ftwx |= (!env->fptags[i]) << i;
253
    memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
254
    memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
255
    fpu.mxcsr = env->mxcsr;
256

  
257
    return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
258
}
259

  
260
static int kvm_put_sregs(CPUState *env)
261
{
262
    struct kvm_sregs sregs;
263

  
264
    memcpy(sregs.interrupt_bitmap,
265
           env->interrupt_bitmap,
266
           sizeof(sregs.interrupt_bitmap));
267

  
268
    if ((env->eflags & VM_MASK)) {
269
	    set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
270
	    set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
271
	    set_v8086_seg(&sregs.es, &env->segs[R_ES]);
272
	    set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
273
	    set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
274
	    set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
275
    } else {
276
	    set_seg(&sregs.cs, &env->segs[R_CS]);
277
	    set_seg(&sregs.ds, &env->segs[R_DS]);
278
	    set_seg(&sregs.es, &env->segs[R_ES]);
279
	    set_seg(&sregs.fs, &env->segs[R_FS]);
280
	    set_seg(&sregs.gs, &env->segs[R_GS]);
281
	    set_seg(&sregs.ss, &env->segs[R_SS]);
282

  
283
	    if (env->cr[0] & CR0_PE_MASK) {
284
		/* force ss cpl to cs cpl */
285
		sregs.ss.selector = (sregs.ss.selector & ~3) |
286
			(sregs.cs.selector & 3);
287
		sregs.ss.dpl = sregs.ss.selector & 3;
288
	    }
289
    }
290

  
291
    set_seg(&sregs.tr, &env->tr);
292
    set_seg(&sregs.ldt, &env->ldt);
293

  
294
    sregs.idt.limit = env->idt.limit;
295
    sregs.idt.base = env->idt.base;
296
    sregs.gdt.limit = env->gdt.limit;
297
    sregs.gdt.base = env->gdt.base;
298

  
299
    sregs.cr0 = env->cr[0];
300
    sregs.cr2 = env->cr[2];
301
    sregs.cr3 = env->cr[3];
302
    sregs.cr4 = env->cr[4];
303

  
304
    sregs.cr8 = cpu_get_apic_tpr(env);
305
    sregs.apic_base = cpu_get_apic_base(env);
306

  
307
    sregs.efer = env->efer;
308

  
309
    return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
310
}
311

  
312
static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
313
                              uint32_t index, uint64_t value)
314
{
315
    entry->index = index;
316
    entry->data = value;
317
}
318

  
319
static int kvm_put_msrs(CPUState *env)
320
{
321
    struct {
322
        struct kvm_msrs info;
323
        struct kvm_msr_entry entries[100];
324
    } msr_data;
325
    struct kvm_msr_entry *msrs = msr_data.entries;
326
    int n = 0;
327

  
328
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
329
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
330
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
331
    if (kvm_has_msr_star(env))
332
	kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
333
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
334
#ifdef TARGET_X86_64
335
    /* FIXME if lm capable */
336
    kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
337
    kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
338
    kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
339
    kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
340
#endif
341
    msr_data.info.nmsrs = n;
342

  
343
    return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
344

  
345
}
346

  
347

  
348
static int kvm_get_fpu(CPUState *env)
349
{
350
    struct kvm_fpu fpu;
351
    int i, ret;
352

  
353
    ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
354
    if (ret < 0)
355
        return ret;
356

  
357
    env->fpstt = (fpu.fsw >> 11) & 7;
358
    env->fpus = fpu.fsw;
359
    env->fpuc = fpu.fcw;
360
    for (i = 0; i < 8; ++i)
361
	env->fptags[i] = !((fpu.ftwx >> i) & 1);
362
    memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
363
    memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
364
    env->mxcsr = fpu.mxcsr;
365

  
366
    return 0;
367
}
368

  
369
static int kvm_get_sregs(CPUState *env)
370
{
371
    struct kvm_sregs sregs;
372
    uint32_t hflags;
373
    int ret;
374

  
375
    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
376
    if (ret < 0)
377
        return ret;
378

  
379
    memcpy(env->interrupt_bitmap, 
380
           sregs.interrupt_bitmap,
381
           sizeof(sregs.interrupt_bitmap));
382

  
383
    get_seg(&env->segs[R_CS], &sregs.cs);
384
    get_seg(&env->segs[R_DS], &sregs.ds);
385
    get_seg(&env->segs[R_ES], &sregs.es);
386
    get_seg(&env->segs[R_FS], &sregs.fs);
387
    get_seg(&env->segs[R_GS], &sregs.gs);
388
    get_seg(&env->segs[R_SS], &sregs.ss);
389

  
390
    get_seg(&env->tr, &sregs.tr);
391
    get_seg(&env->ldt, &sregs.ldt);
392

  
393
    env->idt.limit = sregs.idt.limit;
394
    env->idt.base = sregs.idt.base;
395
    env->gdt.limit = sregs.gdt.limit;
396
    env->gdt.base = sregs.gdt.base;
397

  
398
    env->cr[0] = sregs.cr0;
399
    env->cr[2] = sregs.cr2;
400
    env->cr[3] = sregs.cr3;
401
    env->cr[4] = sregs.cr4;
402

  
403
    cpu_set_apic_base(env, sregs.apic_base);
404

  
405
    env->efer = sregs.efer;
406
    //cpu_set_apic_tpr(env, sregs.cr8);
407

  
408
#define HFLAG_COPY_MASK ~( \
409
			HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
410
			HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
411
			HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
412
			HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
413

  
414

  
415

  
416
    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
417
    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
418
    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
419
	    (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
420
    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
421
    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
422
	    (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
423

  
424
    if (env->efer & MSR_EFER_LMA) {
425
        hflags |= HF_LMA_MASK;
426
    }
427

  
428
    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
429
        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
430
    } else {
431
        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
432
		(DESC_B_SHIFT - HF_CS32_SHIFT);
433
        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
434
		(DESC_B_SHIFT - HF_SS32_SHIFT);
435
        if (!(env->cr[0] & CR0_PE_MASK) ||
436
                   (env->eflags & VM_MASK) ||
437
                   !(hflags & HF_CS32_MASK)) {
438
                hflags |= HF_ADDSEG_MASK;
439
            } else {
440
                hflags |= ((env->segs[R_DS].base |
441
                                env->segs[R_ES].base |
442
                                env->segs[R_SS].base) != 0) <<
443
                    HF_ADDSEG_SHIFT;
444
            }
445
    }
446
    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
447
    env->cc_src = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
448
    env->df = 1 - (2 * ((env->eflags >> 10) & 1));
449
    env->cc_op = CC_OP_EFLAGS;
450
    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
451

  
452
    return 0;
453
}
454

  
455
static int kvm_get_msrs(CPUState *env)
456
{
457
    struct {
458
        struct kvm_msrs info;
459
        struct kvm_msr_entry entries[100];
460
    } msr_data;
461
    struct kvm_msr_entry *msrs = msr_data.entries;
462
    int ret, i, n;
463

  
464
    n = 0;
465
    msrs[n++].index = MSR_IA32_SYSENTER_CS;
466
    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
467
    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
468
    if (kvm_has_msr_star(env))
469
	msrs[n++].index = MSR_STAR;
470
    msrs[n++].index = MSR_IA32_TSC;
471
#ifdef TARGET_X86_64
472
    /* FIXME lm_capable_kernel */
473
    msrs[n++].index = MSR_CSTAR;
474
    msrs[n++].index = MSR_KERNELGSBASE;
475
    msrs[n++].index = MSR_FMASK;
476
    msrs[n++].index = MSR_LSTAR;
477
#endif
478
    msr_data.info.nmsrs = n;
479
    ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
480
    if (ret < 0)
481
        return ret;
482

  
483
    for (i = 0; i < ret; i++) {
484
        switch (msrs[i].index) {
485
        case MSR_IA32_SYSENTER_CS:
486
            env->sysenter_cs = msrs[i].data;
487
            break;
488
        case MSR_IA32_SYSENTER_ESP:
489
            env->sysenter_esp = msrs[i].data;
490
            break;
491
        case MSR_IA32_SYSENTER_EIP:
492
            env->sysenter_eip = msrs[i].data;
493
            break;
494
        case MSR_STAR:
495
            env->star = msrs[i].data;
496
            break;
497
#ifdef TARGET_X86_64
498
        case MSR_CSTAR:
499
            env->cstar = msrs[i].data;
500
            break;
501
        case MSR_KERNELGSBASE:
502
            env->kernelgsbase = msrs[i].data;
503
            break;
504
        case MSR_FMASK:
505
            env->fmask = msrs[i].data;
506
            break;
507
        case MSR_LSTAR:
508
            env->lstar = msrs[i].data;
509
            break;
510
#endif
511
        case MSR_IA32_TSC:
512
            env->tsc = msrs[i].data;
513
            break;
514
        }
515
    }
516

  
517
    return 0;
518
}
519

  
520
int kvm_arch_put_registers(CPUState *env)
521
{
522
    int ret;
523

  
524
    ret = kvm_getput_regs(env, 1);
525
    if (ret < 0)
526
        return ret;
527

  
528
    ret = kvm_put_fpu(env);
529
    if (ret < 0)
530
        return ret;
531

  
532
    ret = kvm_put_sregs(env);
533
    if (ret < 0)
534
        return ret;
535

  
536
    ret = kvm_put_msrs(env);
537
    if (ret < 0)
538
        return ret;
539

  
540
    return 0;
541
}
542

  
543
int kvm_arch_get_registers(CPUState *env)
544
{
545
    int ret;
546

  
547
    ret = kvm_getput_regs(env, 0);
548
    if (ret < 0)
549
        return ret;
550

  
551
    ret = kvm_get_fpu(env);
552
    if (ret < 0)
553
        return ret;
554

  
555
    ret = kvm_get_sregs(env);
556
    if (ret < 0)
557
        return ret;
558

  
559
    ret = kvm_get_msrs(env);
560
    if (ret < 0)
561
        return ret;
562

  
563
    return 0;
564
}
565

  
566
int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
567
{
568
    /* Try to inject an interrupt if the guest can accept it */
569
    if (run->ready_for_interrupt_injection &&
570
        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
571
        (env->eflags & IF_MASK)) {
572
        int irq;
573

  
574
        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
575
        irq = cpu_get_pic_interrupt(env);
576
        if (irq >= 0) {
577
            struct kvm_interrupt intr;
578
            intr.irq = irq;
579
            /* FIXME: errors */
580
            dprintf("injected interrupt %d\n", irq);
581
            kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
582
        }
583
    }
584

  
585
    /* If we have an interrupt but the guest is not ready to receive an
586
     * interrupt, request an interrupt window exit.  This will
587
     * cause a return to userspace as soon as the guest is ready to
588
     * receive interrupts. */
589
    if ((env->interrupt_request & CPU_INTERRUPT_HARD))
590
        run->request_interrupt_window = 1;
591
    else
592
        run->request_interrupt_window = 0;
593

  
594
    dprintf("setting tpr\n");
595
    run->cr8 = cpu_get_apic_tpr(env);
596

  
597
    return 0;
598
}
599

  
600
int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
601
{
602
    if (run->if_flag)
603
        env->eflags |= IF_MASK;
604
    else
605
        env->eflags &= ~IF_MASK;
606
    
607
    cpu_set_apic_tpr(env, run->cr8);
608
    cpu_set_apic_base(env, run->apic_base);
609

  
610
    return 0;
611
}
612

  
613
static int kvm_handle_halt(CPUState *env)
614
{
615
    if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
616
          (env->eflags & IF_MASK)) &&
617
        !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
618
        env->halted = 1;
619
        env->exception_index = EXCP_HLT;
620
        return 0;
621
    }
622

  
623
    return 1;
624
}
625

  
626
int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
627
{
628
    int ret = 0;
629

  
630
    switch (run->exit_reason) {
631
    case KVM_EXIT_HLT:
632
        dprintf("handle_hlt\n");
633
        ret = kvm_handle_halt(env);
634
        break;
635
    }
636

  
637
    return ret;
638
}

Also available in: Unified diff