Statistics
| Branch: | Revision:

root / kqemu.c @ 1eed09cb

History | View | Annotate | Download (27.9 kB)

1
/*
2
 *  KQEMU support
3
 *
4
 *  Copyright (c) 2005-2008 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
19
 */
20
#include "config.h"
21
#ifdef _WIN32
22
#include <windows.h>
23
#include <winioctl.h>
24
#else
25
#include <sys/types.h>
26
#include <sys/mman.h>
27
#include <sys/ioctl.h>
28
#endif
29
#ifdef HOST_SOLARIS
30
#include <sys/ioccom.h>
31
#endif
32
#include <stdlib.h>
33
#include <stdio.h>
34
#include <stdarg.h>
35
#include <string.h>
36
#include <errno.h>
37
#include <unistd.h>
38
#include <inttypes.h>
39

    
40
#include "cpu.h"
41
#include "exec-all.h"
42
#include "qemu-common.h"
43

    
44
#ifdef CONFIG_KQEMU
45

    
46
#define DEBUG
47
//#define PROFILE
48

    
49

    
50
#ifdef DEBUG
51
#  define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
52
#  define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
53
#else
54
#  define LOG_INT(...) do { } while (0)
55
#  define LOG_INT_STATE(env) do { } while (0)
56
#endif
57

    
58
#include <unistd.h>
59
#include <fcntl.h>
60
#include "kqemu.h"
61

    
62
#ifdef _WIN32
63
#define KQEMU_DEVICE "\\\\.\\kqemu"
64
#else
65
#define KQEMU_DEVICE "/dev/kqemu"
66
#endif
67

    
68
static void qpi_init(void);
69

    
70
#ifdef _WIN32
71
#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
72
HANDLE kqemu_fd = KQEMU_INVALID_FD;
73
#define kqemu_closefd(x) CloseHandle(x)
74
#else
75
#define KQEMU_INVALID_FD -1
76
int kqemu_fd = KQEMU_INVALID_FD;
77
#define kqemu_closefd(x) close(x)
78
#endif
79

    
80
/* 0 = not allowed
81
   1 = user kqemu
82
   2 = kernel kqemu
83
*/
84
int kqemu_allowed = 1;
85
uint64_t *pages_to_flush;
86
unsigned int nb_pages_to_flush;
87
uint64_t *ram_pages_to_update;
88
unsigned int nb_ram_pages_to_update;
89
uint64_t *modified_ram_pages;
90
unsigned int nb_modified_ram_pages;
91
uint8_t *modified_ram_pages_table;
92
int qpi_io_memory;
93
uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
94
ram_addr_t kqemu_phys_ram_size;
95
uint8_t *kqemu_phys_ram_base;
96

    
97
#define cpuid(index, eax, ebx, ecx, edx) \
98
  asm volatile ("cpuid" \
99
                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
100
                : "0" (index))
101

    
102
#ifdef __x86_64__
103
static int is_cpuid_supported(void)
104
{
105
    return 1;
106
}
107
#else
108
static int is_cpuid_supported(void)
109
{
110
    int v0, v1;
111
    asm volatile ("pushf\n"
112
                  "popl %0\n"
113
                  "movl %0, %1\n"
114
                  "xorl $0x00200000, %0\n"
115
                  "pushl %0\n"
116
                  "popf\n"
117
                  "pushf\n"
118
                  "popl %0\n"
119
                  : "=a" (v0), "=d" (v1)
120
                  :
121
                  : "cc");
122
    return (v0 != v1);
123
}
124
#endif
125

    
126
static void kqemu_update_cpuid(CPUState *env)
127
{
128
    int critical_features_mask, features, ext_features, ext_features_mask;
129
    uint32_t eax, ebx, ecx, edx;
130

    
131
    /* the following features are kept identical on the host and
132
       target cpus because they are important for user code. Strictly
133
       speaking, only SSE really matters because the OS must support
134
       it if the user code uses it. */
135
    critical_features_mask =
136
        CPUID_CMOV | CPUID_CX8 |
137
        CPUID_FXSR | CPUID_MMX | CPUID_SSE |
138
        CPUID_SSE2 | CPUID_SEP;
139
    ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
140
    if (!is_cpuid_supported()) {
141
        features = 0;
142
        ext_features = 0;
143
    } else {
144
        cpuid(1, eax, ebx, ecx, edx);
145
        features = edx;
146
        ext_features = ecx;
147
    }
148
#ifdef __x86_64__
149
    /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
150
       compatibility mode, so in order to have the best performances
151
       it is better not to use it */
152
    features &= ~CPUID_SEP;
153
#endif
154
    env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
155
        (features & critical_features_mask);
156
    env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
157
        (ext_features & ext_features_mask);
158
    /* XXX: we could update more of the target CPUID state so that the
159
       non accelerated code sees exactly the same CPU features as the
160
       accelerated code */
161
}
162

    
163
int kqemu_init(CPUState *env)
164
{
165
    struct kqemu_init kinit;
166
    int ret, version;
167
#ifdef _WIN32
168
    DWORD temp;
169
#endif
170

    
171
    if (!kqemu_allowed)
172
        return -1;
173

    
174
#ifdef _WIN32
175
    kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
176
                          FILE_SHARE_READ | FILE_SHARE_WRITE,
177
                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
178
                          NULL);
179
    if (kqemu_fd == KQEMU_INVALID_FD) {
180
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
181
                KQEMU_DEVICE, GetLastError());
182
        return -1;
183
    }
184
#else
185
    kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
186
    if (kqemu_fd == KQEMU_INVALID_FD) {
187
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
188
                KQEMU_DEVICE, strerror(errno));
189
        return -1;
190
    }
191
#endif
192
    version = 0;
193
#ifdef _WIN32
194
    DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
195
                    &version, sizeof(version), &temp, NULL);
196
#else
197
    ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
198
#endif
199
    if (version != KQEMU_VERSION) {
200
        fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
201
                version, KQEMU_VERSION);
202
        goto fail;
203
    }
204

    
205
    pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
206
                                  sizeof(uint64_t));
207
    if (!pages_to_flush)
208
        goto fail;
209

    
210
    ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
211
                                       sizeof(uint64_t));
212
    if (!ram_pages_to_update)
213
        goto fail;
214

    
215
    modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
216
                                      sizeof(uint64_t));
217
    if (!modified_ram_pages)
218
        goto fail;
219
    modified_ram_pages_table =
220
        qemu_mallocz(kqemu_phys_ram_size >> TARGET_PAGE_BITS);
221
    if (!modified_ram_pages_table)
222
        goto fail;
223

    
224
    memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
225
    kinit.ram_base = kqemu_phys_ram_base;
226
    kinit.ram_size = kqemu_phys_ram_size;
227
    kinit.ram_dirty = phys_ram_dirty;
228
    kinit.pages_to_flush = pages_to_flush;
229
    kinit.ram_pages_to_update = ram_pages_to_update;
230
    kinit.modified_ram_pages = modified_ram_pages;
231
#ifdef _WIN32
232
    ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
233
                          NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
234
#else
235
    ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
236
#endif
237
    if (ret < 0) {
238
        fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
239
    fail:
240
        kqemu_closefd(kqemu_fd);
241
        kqemu_fd = KQEMU_INVALID_FD;
242
        return -1;
243
    }
244
    kqemu_update_cpuid(env);
245
    env->kqemu_enabled = kqemu_allowed;
246
    nb_pages_to_flush = 0;
247
    nb_ram_pages_to_update = 0;
248

    
249
    qpi_init();
250
    return 0;
251
}
252

    
253
void kqemu_flush_page(CPUState *env, target_ulong addr)
254
{
255
    LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
256
    if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
257
        nb_pages_to_flush = KQEMU_FLUSH_ALL;
258
    else
259
        pages_to_flush[nb_pages_to_flush++] = addr;
260
}
261

    
262
void kqemu_flush(CPUState *env, int global)
263
{
264
    LOG_INT("kqemu_flush:\n");
265
    nb_pages_to_flush = KQEMU_FLUSH_ALL;
266
}
267

    
268
void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
269
{
270
    LOG_INT("kqemu_set_notdirty: addr=%08lx\n", 
271
                (unsigned long)ram_addr);
272
    /* we only track transitions to dirty state */
273
    if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
274
        return;
275
    if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
276
        nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
277
    else
278
        ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
279
}
280

    
281
static void kqemu_reset_modified_ram_pages(void)
282
{
283
    int i;
284
    unsigned long page_index;
285

    
286
    for(i = 0; i < nb_modified_ram_pages; i++) {
287
        page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
288
        modified_ram_pages_table[page_index] = 0;
289
    }
290
    nb_modified_ram_pages = 0;
291
}
292

    
293
void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
294
{
295
    unsigned long page_index;
296
    int ret;
297
#ifdef _WIN32
298
    DWORD temp;
299
#endif
300

    
301
    page_index = ram_addr >> TARGET_PAGE_BITS;
302
    if (!modified_ram_pages_table[page_index]) {
303
#if 0
304
        printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
305
#endif
306
        modified_ram_pages_table[page_index] = 1;
307
        modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
308
        if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
309
            /* flush */
310
#ifdef _WIN32
311
            ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
312
                                  &nb_modified_ram_pages,
313
                                  sizeof(nb_modified_ram_pages),
314
                                  NULL, 0, &temp, NULL);
315
#else
316
            ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
317
                        &nb_modified_ram_pages);
318
#endif
319
            kqemu_reset_modified_ram_pages();
320
        }
321
    }
322
}
323

    
324
void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size, 
325
                        ram_addr_t phys_offset)
326
{
327
    struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
328
    uint64_t end;
329
    int ret, io_index;
330

    
331
    end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
332
    start_addr &= TARGET_PAGE_MASK;
333
    kphys_mem->phys_addr = start_addr;
334
    kphys_mem->size = end - start_addr;
335
    kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
336
    io_index = phys_offset & ~TARGET_PAGE_MASK;
337
    switch(io_index) {
338
    case IO_MEM_RAM:
339
        kphys_mem->io_index = KQEMU_IO_MEM_RAM;
340
        break;
341
    case IO_MEM_ROM:
342
        kphys_mem->io_index = KQEMU_IO_MEM_ROM;
343
        break;
344
    default:
345
        if (qpi_io_memory == io_index) {
346
            kphys_mem->io_index = KQEMU_IO_MEM_COMM;
347
        } else {
348
            kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
349
        }
350
        break;
351
    }
352
#ifdef _WIN32
353
    {
354
        DWORD temp;
355
        ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM, 
356
                              kphys_mem, sizeof(*kphys_mem),
357
                              NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
358
    }
359
#else
360
    ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
361
#endif
362
    if (ret < 0) {
363
        fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
364
                ret, start_addr, 
365
                (unsigned long)size, (unsigned long)phys_offset);
366
    }
367
}
368

    
369
struct fpstate {
370
    uint16_t fpuc;
371
    uint16_t dummy1;
372
    uint16_t fpus;
373
    uint16_t dummy2;
374
    uint16_t fptag;
375
    uint16_t dummy3;
376

    
377
    uint32_t fpip;
378
    uint32_t fpcs;
379
    uint32_t fpoo;
380
    uint32_t fpos;
381
    uint8_t fpregs1[8 * 10];
382
};
383

    
384
struct fpxstate {
385
    uint16_t fpuc;
386
    uint16_t fpus;
387
    uint16_t fptag;
388
    uint16_t fop;
389
    uint32_t fpuip;
390
    uint16_t cs_sel;
391
    uint16_t dummy0;
392
    uint32_t fpudp;
393
    uint16_t ds_sel;
394
    uint16_t dummy1;
395
    uint32_t mxcsr;
396
    uint32_t mxcsr_mask;
397
    uint8_t fpregs1[8 * 16];
398
    uint8_t xmm_regs[16 * 16];
399
    uint8_t dummy2[96];
400
};
401

    
402
static struct fpxstate fpx1 __attribute__((aligned(16)));
403

    
404
static void restore_native_fp_frstor(CPUState *env)
405
{
406
    int fptag, i, j;
407
    struct fpstate fp1, *fp = &fp1;
408

    
409
    fp->fpuc = env->fpuc;
410
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
411
    fptag = 0;
412
    for (i=7; i>=0; i--) {
413
        fptag <<= 2;
414
        if (env->fptags[i]) {
415
            fptag |= 3;
416
        } else {
417
            /* the FPU automatically computes it */
418
        }
419
    }
420
    fp->fptag = fptag;
421
    j = env->fpstt;
422
    for(i = 0;i < 8; i++) {
423
        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
424
        j = (j + 1) & 7;
425
    }
426
    asm volatile ("frstor %0" : "=m" (*fp));
427
}
428

    
429
static void save_native_fp_fsave(CPUState *env)
430
{
431
    int fptag, i, j;
432
    uint16_t fpuc;
433
    struct fpstate fp1, *fp = &fp1;
434

    
435
    asm volatile ("fsave %0" : : "m" (*fp));
436
    env->fpuc = fp->fpuc;
437
    env->fpstt = (fp->fpus >> 11) & 7;
438
    env->fpus = fp->fpus & ~0x3800;
439
    fptag = fp->fptag;
440
    for(i = 0;i < 8; i++) {
441
        env->fptags[i] = ((fptag & 3) == 3);
442
        fptag >>= 2;
443
    }
444
    j = env->fpstt;
445
    for(i = 0;i < 8; i++) {
446
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
447
        j = (j + 1) & 7;
448
    }
449
    /* we must restore the default rounding state */
450
    fpuc = 0x037f | (env->fpuc & (3 << 10));
451
    asm volatile("fldcw %0" : : "m" (fpuc));
452
}
453

    
454
static void restore_native_fp_fxrstor(CPUState *env)
455
{
456
    struct fpxstate *fp = &fpx1;
457
    int i, j, fptag;
458

    
459
    fp->fpuc = env->fpuc;
460
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
461
    fptag = 0;
462
    for(i = 0; i < 8; i++)
463
        fptag |= (env->fptags[i] << i);
464
    fp->fptag = fptag ^ 0xff;
465

    
466
    j = env->fpstt;
467
    for(i = 0;i < 8; i++) {
468
        memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
469
        j = (j + 1) & 7;
470
    }
471
    if (env->cpuid_features & CPUID_SSE) {
472
        fp->mxcsr = env->mxcsr;
473
        /* XXX: check if DAZ is not available */
474
        fp->mxcsr_mask = 0xffff;
475
        memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
476
    }
477
    asm volatile ("fxrstor %0" : "=m" (*fp));
478
}
479

    
480
static void save_native_fp_fxsave(CPUState *env)
481
{
482
    struct fpxstate *fp = &fpx1;
483
    int fptag, i, j;
484
    uint16_t fpuc;
485

    
486
    asm volatile ("fxsave %0" : : "m" (*fp));
487
    env->fpuc = fp->fpuc;
488
    env->fpstt = (fp->fpus >> 11) & 7;
489
    env->fpus = fp->fpus & ~0x3800;
490
    fptag = fp->fptag ^ 0xff;
491
    for(i = 0;i < 8; i++) {
492
        env->fptags[i] = (fptag >> i) & 1;
493
    }
494
    j = env->fpstt;
495
    for(i = 0;i < 8; i++) {
496
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
497
        j = (j + 1) & 7;
498
    }
499
    if (env->cpuid_features & CPUID_SSE) {
500
        env->mxcsr = fp->mxcsr;
501
        memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
502
    }
503

    
504
    /* we must restore the default rounding state */
505
    asm volatile ("fninit");
506
    fpuc = 0x037f | (env->fpuc & (3 << 10));
507
    asm volatile("fldcw %0" : : "m" (fpuc));
508
}
509

    
510
static int do_syscall(CPUState *env,
511
                      struct kqemu_cpu_state *kenv)
512
{
513
    int selector;
514

    
515
    selector = (env->star >> 32) & 0xffff;
516
#ifdef TARGET_X86_64
517
    if (env->hflags & HF_LMA_MASK) {
518
        int code64;
519

    
520
        env->regs[R_ECX] = kenv->next_eip;
521
        env->regs[11] = env->eflags;
522

    
523
        code64 = env->hflags & HF_CS64_MASK;
524

    
525
        cpu_x86_set_cpl(env, 0);
526
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
527
                               0, 0xffffffff,
528
                               DESC_G_MASK | DESC_P_MASK |
529
                               DESC_S_MASK |
530
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
531
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
532
                               0, 0xffffffff,
533
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
534
                               DESC_S_MASK |
535
                               DESC_W_MASK | DESC_A_MASK);
536
        env->eflags &= ~env->fmask;
537
        if (code64)
538
            env->eip = env->lstar;
539
        else
540
            env->eip = env->cstar;
541
    } else
542
#endif
543
    {
544
        env->regs[R_ECX] = (uint32_t)kenv->next_eip;
545

    
546
        cpu_x86_set_cpl(env, 0);
547
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
548
                           0, 0xffffffff,
549
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
550
                               DESC_S_MASK |
551
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
552
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
553
                               0, 0xffffffff,
554
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
555
                               DESC_S_MASK |
556
                               DESC_W_MASK | DESC_A_MASK);
557
        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
558
        env->eip = (uint32_t)env->star;
559
    }
560
    return 2;
561
}
562

    
563
#ifdef CONFIG_PROFILER
564

    
565
#define PC_REC_SIZE 1
566
#define PC_REC_HASH_BITS 16
567
#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
568

    
569
typedef struct PCRecord {
570
    unsigned long pc;
571
    int64_t count;
572
    struct PCRecord *next;
573
} PCRecord;
574

    
575
static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
576
static int nb_pc_records;
577

    
578
static void kqemu_record_pc(unsigned long pc)
579
{
580
    unsigned long h;
581
    PCRecord **pr, *r;
582

    
583
    h = pc / PC_REC_SIZE;
584
    h = h ^ (h >> PC_REC_HASH_BITS);
585
    h &= (PC_REC_HASH_SIZE - 1);
586
    pr = &pc_rec_hash[h];
587
    for(;;) {
588
        r = *pr;
589
        if (r == NULL)
590
            break;
591
        if (r->pc == pc) {
592
            r->count++;
593
            return;
594
        }
595
        pr = &r->next;
596
    }
597
    r = malloc(sizeof(PCRecord));
598
    r->count = 1;
599
    r->pc = pc;
600
    r->next = NULL;
601
    *pr = r;
602
    nb_pc_records++;
603
}
604

    
605
static int pc_rec_cmp(const void *p1, const void *p2)
606
{
607
    PCRecord *r1 = *(PCRecord **)p1;
608
    PCRecord *r2 = *(PCRecord **)p2;
609
    if (r1->count < r2->count)
610
        return 1;
611
    else if (r1->count == r2->count)
612
        return 0;
613
    else
614
        return -1;
615
}
616

    
617
static void kqemu_record_flush(void)
618
{
619
    PCRecord *r, *r_next;
620
    int h;
621

    
622
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
623
        for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
624
            r_next = r->next;
625
            free(r);
626
        }
627
        pc_rec_hash[h] = NULL;
628
    }
629
    nb_pc_records = 0;
630
}
631

    
632
void kqemu_record_dump(void)
633
{
634
    PCRecord **pr, *r;
635
    int i, h;
636
    FILE *f;
637
    int64_t total, sum;
638

    
639
    pr = malloc(sizeof(PCRecord *) * nb_pc_records);
640
    i = 0;
641
    total = 0;
642
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
643
        for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
644
            pr[i++] = r;
645
            total += r->count;
646
        }
647
    }
648
    qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
649

    
650
    f = fopen("/tmp/kqemu.stats", "w");
651
    if (!f) {
652
        perror("/tmp/kqemu.stats");
653
        exit(1);
654
    }
655
    fprintf(f, "total: %" PRId64 "\n", total);
656
    sum = 0;
657
    for(i = 0; i < nb_pc_records; i++) {
658
        r = pr[i];
659
        sum += r->count;
660
        fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
661
                r->pc,
662
                r->count,
663
                (double)r->count / (double)total * 100.0,
664
                (double)sum / (double)total * 100.0);
665
    }
666
    fclose(f);
667
    free(pr);
668

    
669
    kqemu_record_flush();
670
}
671
#endif
672

    
673
static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
674
                                  const SegmentCache *sc)
675
{
676
    ksc->selector = sc->selector;
677
    ksc->flags = sc->flags;
678
    ksc->limit = sc->limit;
679
    ksc->base = sc->base;
680
}
681

    
682
static inline void kqemu_save_seg(SegmentCache *sc,
683
                                  const struct kqemu_segment_cache *ksc)
684
{
685
    sc->selector = ksc->selector;
686
    sc->flags = ksc->flags;
687
    sc->limit = ksc->limit;
688
    sc->base = ksc->base;
689
}
690

    
691
int kqemu_cpu_exec(CPUState *env)
692
{
693
    struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
694
    int ret, cpl, i;
695
#ifdef CONFIG_PROFILER
696
    int64_t ti;
697
#endif
698
#ifdef _WIN32
699
    DWORD temp;
700
#endif
701

    
702
#ifdef CONFIG_PROFILER
703
    ti = profile_getclock();
704
#endif
705
    LOG_INT("kqemu: cpu_exec: enter\n");
706
    LOG_INT_STATE(env);
707
    for(i = 0; i < CPU_NB_REGS; i++)
708
        kenv->regs[i] = env->regs[i];
709
    kenv->eip = env->eip;
710
    kenv->eflags = env->eflags;
711
    for(i = 0; i < 6; i++)
712
        kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
713
    kqemu_load_seg(&kenv->ldt, &env->ldt);
714
    kqemu_load_seg(&kenv->tr, &env->tr);
715
    kqemu_load_seg(&kenv->gdt, &env->gdt);
716
    kqemu_load_seg(&kenv->idt, &env->idt);
717
    kenv->cr0 = env->cr[0];
718
    kenv->cr2 = env->cr[2];
719
    kenv->cr3 = env->cr[3];
720
    kenv->cr4 = env->cr[4];
721
    kenv->a20_mask = env->a20_mask;
722
    kenv->efer = env->efer;
723
    kenv->tsc_offset = 0;
724
    kenv->star = env->star;
725
    kenv->sysenter_cs = env->sysenter_cs;
726
    kenv->sysenter_esp = env->sysenter_esp;
727
    kenv->sysenter_eip = env->sysenter_eip;
728
#ifdef TARGET_X86_64
729
    kenv->lstar = env->lstar;
730
    kenv->cstar = env->cstar;
731
    kenv->fmask = env->fmask;
732
    kenv->kernelgsbase = env->kernelgsbase;
733
#endif
734
    if (env->dr[7] & 0xff) {
735
        kenv->dr7 = env->dr[7];
736
        kenv->dr0 = env->dr[0];
737
        kenv->dr1 = env->dr[1];
738
        kenv->dr2 = env->dr[2];
739
        kenv->dr3 = env->dr[3];
740
    } else {
741
        kenv->dr7 = 0;
742
    }
743
    kenv->dr6 = env->dr[6];
744
    cpl = (env->hflags & HF_CPL_MASK);
745
    kenv->cpl = cpl;
746
    kenv->nb_pages_to_flush = nb_pages_to_flush;
747
    kenv->user_only = (env->kqemu_enabled == 1);
748
    kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
749
    nb_ram_pages_to_update = 0;
750
    kenv->nb_modified_ram_pages = nb_modified_ram_pages;
751

    
752
    kqemu_reset_modified_ram_pages();
753

    
754
    if (env->cpuid_features & CPUID_FXSR)
755
        restore_native_fp_fxrstor(env);
756
    else
757
        restore_native_fp_frstor(env);
758

    
759
#ifdef _WIN32
760
    if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
761
                        kenv, sizeof(struct kqemu_cpu_state),
762
                        kenv, sizeof(struct kqemu_cpu_state),
763
                        &temp, NULL)) {
764
        ret = kenv->retval;
765
    } else {
766
        ret = -1;
767
    }
768
#else
769
    ioctl(kqemu_fd, KQEMU_EXEC, kenv);
770
    ret = kenv->retval;
771
#endif
772
    if (env->cpuid_features & CPUID_FXSR)
773
        save_native_fp_fxsave(env);
774
    else
775
        save_native_fp_fsave(env);
776

    
777
    for(i = 0; i < CPU_NB_REGS; i++)
778
        env->regs[i] = kenv->regs[i];
779
    env->eip = kenv->eip;
780
    env->eflags = kenv->eflags;
781
    for(i = 0; i < 6; i++)
782
        kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
783
    cpu_x86_set_cpl(env, kenv->cpl);
784
    kqemu_save_seg(&env->ldt, &kenv->ldt);
785
    env->cr[0] = kenv->cr0;
786
    env->cr[4] = kenv->cr4;
787
    env->cr[3] = kenv->cr3;
788
    env->cr[2] = kenv->cr2;
789
    env->dr[6] = kenv->dr6;
790
#ifdef TARGET_X86_64
791
    env->kernelgsbase = kenv->kernelgsbase;
792
#endif
793

    
794
    /* flush pages as indicated by kqemu */
795
    if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
796
        tlb_flush(env, 1);
797
    } else {
798
        for(i = 0; i < kenv->nb_pages_to_flush; i++) {
799
            tlb_flush_page(env, pages_to_flush[i]);
800
        }
801
    }
802
    nb_pages_to_flush = 0;
803

    
804
#ifdef CONFIG_PROFILER
805
    kqemu_time += profile_getclock() - ti;
806
    kqemu_exec_count++;
807
#endif
808

    
809
    if (kenv->nb_ram_pages_to_update > 0) {
810
        cpu_tlb_update_dirty(env);
811
    }
812

    
813
    if (kenv->nb_modified_ram_pages > 0) {
814
        for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
815
            unsigned long addr;
816
            addr = modified_ram_pages[i];
817
            tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
818
        }
819
    }
820

    
821
    /* restore the hidden flags */
822
    {
823
        unsigned int new_hflags;
824
#ifdef TARGET_X86_64
825
        if ((env->hflags & HF_LMA_MASK) &&
826
            (env->segs[R_CS].flags & DESC_L_MASK)) {
827
            /* long mode */
828
            new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
829
        } else
830
#endif
831
        {
832
            /* legacy / compatibility case */
833
            new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
834
                >> (DESC_B_SHIFT - HF_CS32_SHIFT);
835
            new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
836
                >> (DESC_B_SHIFT - HF_SS32_SHIFT);
837
            if (!(env->cr[0] & CR0_PE_MASK) ||
838
                   (env->eflags & VM_MASK) ||
839
                   !(env->hflags & HF_CS32_MASK)) {
840
                /* XXX: try to avoid this test. The problem comes from the
841
                   fact that is real mode or vm86 mode we only modify the
842
                   'base' and 'selector' fields of the segment cache to go
843
                   faster. A solution may be to force addseg to one in
844
                   translate-i386.c. */
845
                new_hflags |= HF_ADDSEG_MASK;
846
            } else {
847
                new_hflags |= ((env->segs[R_DS].base |
848
                                env->segs[R_ES].base |
849
                                env->segs[R_SS].base) != 0) <<
850
                    HF_ADDSEG_SHIFT;
851
            }
852
        }
853
        env->hflags = (env->hflags &
854
           ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
855
            new_hflags;
856
    }
857
    /* update FPU flags */
858
    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
859
        ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
860
    if (env->cr[4] & CR4_OSFXSR_MASK)
861
        env->hflags |= HF_OSFXSR_MASK;
862
    else
863
        env->hflags &= ~HF_OSFXSR_MASK;
864

    
865
    LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
866
    if (ret == KQEMU_RET_SYSCALL) {
867
        /* syscall instruction */
868
        return do_syscall(env, kenv);
869
    } else
870
    if ((ret & 0xff00) == KQEMU_RET_INT) {
871
        env->exception_index = ret & 0xff;
872
        env->error_code = 0;
873
        env->exception_is_int = 1;
874
        env->exception_next_eip = kenv->next_eip;
875
#ifdef CONFIG_PROFILER
876
        kqemu_ret_int_count++;
877
#endif
878
        LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
879
        LOG_INT_STATE(env);
880
        return 1;
881
    } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
882
        env->exception_index = ret & 0xff;
883
        env->error_code = kenv->error_code;
884
        env->exception_is_int = 0;
885
        env->exception_next_eip = 0;
886
#ifdef CONFIG_PROFILER
887
        kqemu_ret_excp_count++;
888
#endif
889
        LOG_INT("kqemu: exception v=%02x e=%04x:\n",
890
                    env->exception_index, env->error_code);
891
        LOG_INT_STATE(env);
892
        return 1;
893
    } else if (ret == KQEMU_RET_INTR) {
894
#ifdef CONFIG_PROFILER
895
        kqemu_ret_intr_count++;
896
#endif
897
        LOG_INT_STATE(env);
898
        return 0;
899
    } else if (ret == KQEMU_RET_SOFTMMU) {
900
#ifdef CONFIG_PROFILER
901
        {
902
            unsigned long pc = env->eip + env->segs[R_CS].base;
903
            kqemu_record_pc(pc);
904
        }
905
#endif
906
        LOG_INT_STATE(env);
907
        return 2;
908
    } else {
909
        cpu_dump_state(env, stderr, fprintf, 0);
910
        fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
911
        exit(1);
912
    }
913
    return 0;
914
}
915

    
916
void kqemu_cpu_interrupt(CPUState *env)
917
{
918
#if defined(_WIN32)
919
    /* cancelling the I/O request causes KQEMU to finish executing the
920
       current block and successfully returning. */
921
    CancelIo(kqemu_fd);
922
#endif
923
}
924

    
925
/* 
926
   QEMU paravirtualization interface. The current interface only
927
   allows to modify the IF and IOPL flags when running in
928
   kqemu.
929

930
   At this point it is not very satisfactory. I leave it for reference
931
   as it adds little complexity.
932
*/
933

    
934
#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
935

    
936
static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
937
{
938
    return 0;
939
}
940

    
941
static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
942
{
943
    return 0;
944
}
945

    
946
static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
947
{
948
}
949

    
950
static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
951
{
952
}
953

    
954
static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
955
{
956
    CPUState *env;
957

    
958
    env = cpu_single_env;
959
    if (!env)
960
        return 0;
961
    return env->eflags & (IF_MASK | IOPL_MASK);
962
}
963

    
964
/* Note: after writing to this address, the guest code must make sure
965
   it is exiting the current TB. pushf/popf can be used for that
966
   purpose. */
967
static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
968
{
969
    CPUState *env;
970

    
971
    env = cpu_single_env;
972
    if (!env)
973
        return;
974
    env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) | 
975
        (val & (IF_MASK | IOPL_MASK));
976
}
977

    
978
static CPUReadMemoryFunc *qpi_mem_read[3] = {
979
    qpi_mem_readb,
980
    qpi_mem_readw,
981
    qpi_mem_readl,
982
};
983

    
984
static CPUWriteMemoryFunc *qpi_mem_write[3] = {
985
    qpi_mem_writeb,
986
    qpi_mem_writew,
987
    qpi_mem_writel,
988
};
989

    
990
static void qpi_init(void)
991
{
992
    kqemu_comm_base = 0xff000000 | 1;
993
    qpi_io_memory = cpu_register_io_memory(
994
                                           qpi_mem_read, 
995
                                           qpi_mem_write, NULL);
996
    cpu_register_physical_memory(kqemu_comm_base & ~0xfff, 
997
                                 0x1000, qpi_io_memory);
998
}
999
#endif