Statistics
| Branch: | Revision:

root / kqemu.c @ 604457d7

History | View | Annotate | Download (27.8 kB)

1
/*
2
 *  KQEMU support
3
 *
4
 *  Copyright (c) 2005-2008 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
19
 */
20
#include "config.h"
21
#ifdef _WIN32
22
#include <windows.h>
23
#include <winioctl.h>
24
#else
25
#include <sys/types.h>
26
#include <sys/mman.h>
27
#include <sys/ioctl.h>
28
#endif
29
#ifdef HOST_SOLARIS
30
#include <sys/ioccom.h>
31
#endif
32
#include <stdlib.h>
33
#include <stdio.h>
34
#include <stdarg.h>
35
#include <string.h>
36
#include <errno.h>
37
#include <unistd.h>
38
#include <inttypes.h>
39

    
40
#include "cpu.h"
41
#include "exec-all.h"
42
#include "qemu-common.h"
43

    
44
#ifdef USE_KQEMU
45

    
46
#define DEBUG
47
//#define PROFILE
48

    
49

    
50
#ifdef DEBUG
51
#  define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
52
#  define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
53
#else
54
#  define LOG_INT(...) do { } while (0)
55
#  define LOG_INT_STATE(env) do { } while (0)
56
#endif
57

    
58
#include <unistd.h>
59
#include <fcntl.h>
60
#include "kqemu.h"
61

    
62
#ifdef _WIN32
63
#define KQEMU_DEVICE "\\\\.\\kqemu"
64
#else
65
#define KQEMU_DEVICE "/dev/kqemu"
66
#endif
67

    
68
static void qpi_init(void);
69

    
70
#ifdef _WIN32
71
#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
72
HANDLE kqemu_fd = KQEMU_INVALID_FD;
73
#define kqemu_closefd(x) CloseHandle(x)
74
#else
75
#define KQEMU_INVALID_FD -1
76
int kqemu_fd = KQEMU_INVALID_FD;
77
#define kqemu_closefd(x) close(x)
78
#endif
79

    
80
/* 0 = not allowed
81
   1 = user kqemu
82
   2 = kernel kqemu
83
*/
84
int kqemu_allowed = 1;
85
uint64_t *pages_to_flush;
86
unsigned int nb_pages_to_flush;
87
uint64_t *ram_pages_to_update;
88
unsigned int nb_ram_pages_to_update;
89
uint64_t *modified_ram_pages;
90
unsigned int nb_modified_ram_pages;
91
uint8_t *modified_ram_pages_table;
92
int qpi_io_memory;
93
uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
94

    
95
#define cpuid(index, eax, ebx, ecx, edx) \
96
  asm volatile ("cpuid" \
97
                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
98
                : "0" (index))
99

    
100
#ifdef __x86_64__
101
static int is_cpuid_supported(void)
102
{
103
    return 1;
104
}
105
#else
106
static int is_cpuid_supported(void)
107
{
108
    int v0, v1;
109
    asm volatile ("pushf\n"
110
                  "popl %0\n"
111
                  "movl %0, %1\n"
112
                  "xorl $0x00200000, %0\n"
113
                  "pushl %0\n"
114
                  "popf\n"
115
                  "pushf\n"
116
                  "popl %0\n"
117
                  : "=a" (v0), "=d" (v1)
118
                  :
119
                  : "cc");
120
    return (v0 != v1);
121
}
122
#endif
123

    
124
static void kqemu_update_cpuid(CPUState *env)
125
{
126
    int critical_features_mask, features, ext_features, ext_features_mask;
127
    uint32_t eax, ebx, ecx, edx;
128

    
129
    /* the following features are kept identical on the host and
130
       target cpus because they are important for user code. Strictly
131
       speaking, only SSE really matters because the OS must support
132
       it if the user code uses it. */
133
    critical_features_mask =
134
        CPUID_CMOV | CPUID_CX8 |
135
        CPUID_FXSR | CPUID_MMX | CPUID_SSE |
136
        CPUID_SSE2 | CPUID_SEP;
137
    ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
138
    if (!is_cpuid_supported()) {
139
        features = 0;
140
        ext_features = 0;
141
    } else {
142
        cpuid(1, eax, ebx, ecx, edx);
143
        features = edx;
144
        ext_features = ecx;
145
    }
146
#ifdef __x86_64__
147
    /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
148
       compatibility mode, so in order to have the best performances
149
       it is better not to use it */
150
    features &= ~CPUID_SEP;
151
#endif
152
    env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
153
        (features & critical_features_mask);
154
    env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
155
        (ext_features & ext_features_mask);
156
    /* XXX: we could update more of the target CPUID state so that the
157
       non accelerated code sees exactly the same CPU features as the
158
       accelerated code */
159
}
160

    
161
int kqemu_init(CPUState *env)
162
{
163
    struct kqemu_init kinit;
164
    int ret, version;
165
#ifdef _WIN32
166
    DWORD temp;
167
#endif
168

    
169
    if (!kqemu_allowed)
170
        return -1;
171

    
172
#ifdef _WIN32
173
    kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
174
                          FILE_SHARE_READ | FILE_SHARE_WRITE,
175
                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
176
                          NULL);
177
    if (kqemu_fd == KQEMU_INVALID_FD) {
178
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
179
                KQEMU_DEVICE, GetLastError());
180
        return -1;
181
    }
182
#else
183
    kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
184
    if (kqemu_fd == KQEMU_INVALID_FD) {
185
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
186
                KQEMU_DEVICE, strerror(errno));
187
        return -1;
188
    }
189
#endif
190
    version = 0;
191
#ifdef _WIN32
192
    DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
193
                    &version, sizeof(version), &temp, NULL);
194
#else
195
    ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
196
#endif
197
    if (version != KQEMU_VERSION) {
198
        fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
199
                version, KQEMU_VERSION);
200
        goto fail;
201
    }
202

    
203
    pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
204
                                  sizeof(uint64_t));
205
    if (!pages_to_flush)
206
        goto fail;
207

    
208
    ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
209
                                       sizeof(uint64_t));
210
    if (!ram_pages_to_update)
211
        goto fail;
212

    
213
    modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
214
                                      sizeof(uint64_t));
215
    if (!modified_ram_pages)
216
        goto fail;
217
    modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
218
    if (!modified_ram_pages_table)
219
        goto fail;
220

    
221
    memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
222
    kinit.ram_base = phys_ram_base;
223
    kinit.ram_size = phys_ram_size;
224
    kinit.ram_dirty = phys_ram_dirty;
225
    kinit.pages_to_flush = pages_to_flush;
226
    kinit.ram_pages_to_update = ram_pages_to_update;
227
    kinit.modified_ram_pages = modified_ram_pages;
228
#ifdef _WIN32
229
    ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
230
                          NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
231
#else
232
    ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
233
#endif
234
    if (ret < 0) {
235
        fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
236
    fail:
237
        kqemu_closefd(kqemu_fd);
238
        kqemu_fd = KQEMU_INVALID_FD;
239
        return -1;
240
    }
241
    kqemu_update_cpuid(env);
242
    env->kqemu_enabled = kqemu_allowed;
243
    nb_pages_to_flush = 0;
244
    nb_ram_pages_to_update = 0;
245

    
246
    qpi_init();
247
    return 0;
248
}
249

    
250
void kqemu_flush_page(CPUState *env, target_ulong addr)
251
{
252
    LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
253
    if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
254
        nb_pages_to_flush = KQEMU_FLUSH_ALL;
255
    else
256
        pages_to_flush[nb_pages_to_flush++] = addr;
257
}
258

    
259
void kqemu_flush(CPUState *env, int global)
260
{
261
    LOG_INT("kqemu_flush:\n");
262
    nb_pages_to_flush = KQEMU_FLUSH_ALL;
263
}
264

    
265
void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
266
{
267
    LOG_INT("kqemu_set_notdirty: addr=%08lx\n", 
268
                (unsigned long)ram_addr);
269
    /* we only track transitions to dirty state */
270
    if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
271
        return;
272
    if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
273
        nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
274
    else
275
        ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
276
}
277

    
278
static void kqemu_reset_modified_ram_pages(void)
279
{
280
    int i;
281
    unsigned long page_index;
282

    
283
    for(i = 0; i < nb_modified_ram_pages; i++) {
284
        page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
285
        modified_ram_pages_table[page_index] = 0;
286
    }
287
    nb_modified_ram_pages = 0;
288
}
289

    
290
void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
291
{
292
    unsigned long page_index;
293
    int ret;
294
#ifdef _WIN32
295
    DWORD temp;
296
#endif
297

    
298
    page_index = ram_addr >> TARGET_PAGE_BITS;
299
    if (!modified_ram_pages_table[page_index]) {
300
#if 0
301
        printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
302
#endif
303
        modified_ram_pages_table[page_index] = 1;
304
        modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
305
        if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
306
            /* flush */
307
#ifdef _WIN32
308
            ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
309
                                  &nb_modified_ram_pages,
310
                                  sizeof(nb_modified_ram_pages),
311
                                  NULL, 0, &temp, NULL);
312
#else
313
            ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
314
                        &nb_modified_ram_pages);
315
#endif
316
            kqemu_reset_modified_ram_pages();
317
        }
318
    }
319
}
320

    
321
void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size, 
322
                        ram_addr_t phys_offset)
323
{
324
    struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
325
    uint64_t end;
326
    int ret, io_index;
327

    
328
    end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
329
    start_addr &= TARGET_PAGE_MASK;
330
    kphys_mem->phys_addr = start_addr;
331
    kphys_mem->size = end - start_addr;
332
    kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
333
    io_index = phys_offset & ~TARGET_PAGE_MASK;
334
    switch(io_index) {
335
    case IO_MEM_RAM:
336
        kphys_mem->io_index = KQEMU_IO_MEM_RAM;
337
        break;
338
    case IO_MEM_ROM:
339
        kphys_mem->io_index = KQEMU_IO_MEM_ROM;
340
        break;
341
    default:
342
        if (qpi_io_memory == io_index) {
343
            kphys_mem->io_index = KQEMU_IO_MEM_COMM;
344
        } else {
345
            kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
346
        }
347
        break;
348
    }
349
#ifdef _WIN32
350
    {
351
        DWORD temp;
352
        ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM, 
353
                              kphys_mem, sizeof(*kphys_mem),
354
                              NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
355
    }
356
#else
357
    ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
358
#endif
359
    if (ret < 0) {
360
        fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
361
                ret, start_addr, 
362
                (unsigned long)size, (unsigned long)phys_offset);
363
    }
364
}
365

    
366
struct fpstate {
367
    uint16_t fpuc;
368
    uint16_t dummy1;
369
    uint16_t fpus;
370
    uint16_t dummy2;
371
    uint16_t fptag;
372
    uint16_t dummy3;
373

    
374
    uint32_t fpip;
375
    uint32_t fpcs;
376
    uint32_t fpoo;
377
    uint32_t fpos;
378
    uint8_t fpregs1[8 * 10];
379
};
380

    
381
struct fpxstate {
382
    uint16_t fpuc;
383
    uint16_t fpus;
384
    uint16_t fptag;
385
    uint16_t fop;
386
    uint32_t fpuip;
387
    uint16_t cs_sel;
388
    uint16_t dummy0;
389
    uint32_t fpudp;
390
    uint16_t ds_sel;
391
    uint16_t dummy1;
392
    uint32_t mxcsr;
393
    uint32_t mxcsr_mask;
394
    uint8_t fpregs1[8 * 16];
395
    uint8_t xmm_regs[16 * 16];
396
    uint8_t dummy2[96];
397
};
398

    
399
static struct fpxstate fpx1 __attribute__((aligned(16)));
400

    
401
static void restore_native_fp_frstor(CPUState *env)
402
{
403
    int fptag, i, j;
404
    struct fpstate fp1, *fp = &fp1;
405

    
406
    fp->fpuc = env->fpuc;
407
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
408
    fptag = 0;
409
    for (i=7; i>=0; i--) {
410
        fptag <<= 2;
411
        if (env->fptags[i]) {
412
            fptag |= 3;
413
        } else {
414
            /* the FPU automatically computes it */
415
        }
416
    }
417
    fp->fptag = fptag;
418
    j = env->fpstt;
419
    for(i = 0;i < 8; i++) {
420
        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
421
        j = (j + 1) & 7;
422
    }
423
    asm volatile ("frstor %0" : "=m" (*fp));
424
}
425

    
426
static void save_native_fp_fsave(CPUState *env)
427
{
428
    int fptag, i, j;
429
    uint16_t fpuc;
430
    struct fpstate fp1, *fp = &fp1;
431

    
432
    asm volatile ("fsave %0" : : "m" (*fp));
433
    env->fpuc = fp->fpuc;
434
    env->fpstt = (fp->fpus >> 11) & 7;
435
    env->fpus = fp->fpus & ~0x3800;
436
    fptag = fp->fptag;
437
    for(i = 0;i < 8; i++) {
438
        env->fptags[i] = ((fptag & 3) == 3);
439
        fptag >>= 2;
440
    }
441
    j = env->fpstt;
442
    for(i = 0;i < 8; i++) {
443
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
444
        j = (j + 1) & 7;
445
    }
446
    /* we must restore the default rounding state */
447
    fpuc = 0x037f | (env->fpuc & (3 << 10));
448
    asm volatile("fldcw %0" : : "m" (fpuc));
449
}
450

    
451
static void restore_native_fp_fxrstor(CPUState *env)
452
{
453
    struct fpxstate *fp = &fpx1;
454
    int i, j, fptag;
455

    
456
    fp->fpuc = env->fpuc;
457
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
458
    fptag = 0;
459
    for(i = 0; i < 8; i++)
460
        fptag |= (env->fptags[i] << i);
461
    fp->fptag = fptag ^ 0xff;
462

    
463
    j = env->fpstt;
464
    for(i = 0;i < 8; i++) {
465
        memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
466
        j = (j + 1) & 7;
467
    }
468
    if (env->cpuid_features & CPUID_SSE) {
469
        fp->mxcsr = env->mxcsr;
470
        /* XXX: check if DAZ is not available */
471
        fp->mxcsr_mask = 0xffff;
472
        memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
473
    }
474
    asm volatile ("fxrstor %0" : "=m" (*fp));
475
}
476

    
477
static void save_native_fp_fxsave(CPUState *env)
478
{
479
    struct fpxstate *fp = &fpx1;
480
    int fptag, i, j;
481
    uint16_t fpuc;
482

    
483
    asm volatile ("fxsave %0" : : "m" (*fp));
484
    env->fpuc = fp->fpuc;
485
    env->fpstt = (fp->fpus >> 11) & 7;
486
    env->fpus = fp->fpus & ~0x3800;
487
    fptag = fp->fptag ^ 0xff;
488
    for(i = 0;i < 8; i++) {
489
        env->fptags[i] = (fptag >> i) & 1;
490
    }
491
    j = env->fpstt;
492
    for(i = 0;i < 8; i++) {
493
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
494
        j = (j + 1) & 7;
495
    }
496
    if (env->cpuid_features & CPUID_SSE) {
497
        env->mxcsr = fp->mxcsr;
498
        memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
499
    }
500

    
501
    /* we must restore the default rounding state */
502
    asm volatile ("fninit");
503
    fpuc = 0x037f | (env->fpuc & (3 << 10));
504
    asm volatile("fldcw %0" : : "m" (fpuc));
505
}
506

    
507
static int do_syscall(CPUState *env,
508
                      struct kqemu_cpu_state *kenv)
509
{
510
    int selector;
511

    
512
    selector = (env->star >> 32) & 0xffff;
513
#ifdef TARGET_X86_64
514
    if (env->hflags & HF_LMA_MASK) {
515
        int code64;
516

    
517
        env->regs[R_ECX] = kenv->next_eip;
518
        env->regs[11] = env->eflags;
519

    
520
        code64 = env->hflags & HF_CS64_MASK;
521

    
522
        cpu_x86_set_cpl(env, 0);
523
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
524
                               0, 0xffffffff,
525
                               DESC_G_MASK | DESC_P_MASK |
526
                               DESC_S_MASK |
527
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
528
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
529
                               0, 0xffffffff,
530
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
531
                               DESC_S_MASK |
532
                               DESC_W_MASK | DESC_A_MASK);
533
        env->eflags &= ~env->fmask;
534
        if (code64)
535
            env->eip = env->lstar;
536
        else
537
            env->eip = env->cstar;
538
    } else
539
#endif
540
    {
541
        env->regs[R_ECX] = (uint32_t)kenv->next_eip;
542

    
543
        cpu_x86_set_cpl(env, 0);
544
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
545
                           0, 0xffffffff,
546
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
547
                               DESC_S_MASK |
548
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
549
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
550
                               0, 0xffffffff,
551
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
552
                               DESC_S_MASK |
553
                               DESC_W_MASK | DESC_A_MASK);
554
        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
555
        env->eip = (uint32_t)env->star;
556
    }
557
    return 2;
558
}
559

    
560
#ifdef CONFIG_PROFILER
561

    
562
#define PC_REC_SIZE 1
563
#define PC_REC_HASH_BITS 16
564
#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
565

    
566
typedef struct PCRecord {
567
    unsigned long pc;
568
    int64_t count;
569
    struct PCRecord *next;
570
} PCRecord;
571

    
572
static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
573
static int nb_pc_records;
574

    
575
static void kqemu_record_pc(unsigned long pc)
576
{
577
    unsigned long h;
578
    PCRecord **pr, *r;
579

    
580
    h = pc / PC_REC_SIZE;
581
    h = h ^ (h >> PC_REC_HASH_BITS);
582
    h &= (PC_REC_HASH_SIZE - 1);
583
    pr = &pc_rec_hash[h];
584
    for(;;) {
585
        r = *pr;
586
        if (r == NULL)
587
            break;
588
        if (r->pc == pc) {
589
            r->count++;
590
            return;
591
        }
592
        pr = &r->next;
593
    }
594
    r = malloc(sizeof(PCRecord));
595
    r->count = 1;
596
    r->pc = pc;
597
    r->next = NULL;
598
    *pr = r;
599
    nb_pc_records++;
600
}
601

    
602
static int pc_rec_cmp(const void *p1, const void *p2)
603
{
604
    PCRecord *r1 = *(PCRecord **)p1;
605
    PCRecord *r2 = *(PCRecord **)p2;
606
    if (r1->count < r2->count)
607
        return 1;
608
    else if (r1->count == r2->count)
609
        return 0;
610
    else
611
        return -1;
612
}
613

    
614
static void kqemu_record_flush(void)
615
{
616
    PCRecord *r, *r_next;
617
    int h;
618

    
619
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
620
        for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
621
            r_next = r->next;
622
            free(r);
623
        }
624
        pc_rec_hash[h] = NULL;
625
    }
626
    nb_pc_records = 0;
627
}
628

    
629
void kqemu_record_dump(void)
630
{
631
    PCRecord **pr, *r;
632
    int i, h;
633
    FILE *f;
634
    int64_t total, sum;
635

    
636
    pr = malloc(sizeof(PCRecord *) * nb_pc_records);
637
    i = 0;
638
    total = 0;
639
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
640
        for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
641
            pr[i++] = r;
642
            total += r->count;
643
        }
644
    }
645
    qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
646

    
647
    f = fopen("/tmp/kqemu.stats", "w");
648
    if (!f) {
649
        perror("/tmp/kqemu.stats");
650
        exit(1);
651
    }
652
    fprintf(f, "total: %" PRId64 "\n", total);
653
    sum = 0;
654
    for(i = 0; i < nb_pc_records; i++) {
655
        r = pr[i];
656
        sum += r->count;
657
        fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
658
                r->pc,
659
                r->count,
660
                (double)r->count / (double)total * 100.0,
661
                (double)sum / (double)total * 100.0);
662
    }
663
    fclose(f);
664
    free(pr);
665

    
666
    kqemu_record_flush();
667
}
668
#endif
669

    
670
static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
671
                                  const SegmentCache *sc)
672
{
673
    ksc->selector = sc->selector;
674
    ksc->flags = sc->flags;
675
    ksc->limit = sc->limit;
676
    ksc->base = sc->base;
677
}
678

    
679
static inline void kqemu_save_seg(SegmentCache *sc,
680
                                  const struct kqemu_segment_cache *ksc)
681
{
682
    sc->selector = ksc->selector;
683
    sc->flags = ksc->flags;
684
    sc->limit = ksc->limit;
685
    sc->base = ksc->base;
686
}
687

    
688
int kqemu_cpu_exec(CPUState *env)
689
{
690
    struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
691
    int ret, cpl, i;
692
#ifdef CONFIG_PROFILER
693
    int64_t ti;
694
#endif
695
#ifdef _WIN32
696
    DWORD temp;
697
#endif
698

    
699
#ifdef CONFIG_PROFILER
700
    ti = profile_getclock();
701
#endif
702
    LOG_INT("kqemu: cpu_exec: enter\n");
703
    LOG_INT_STATE(env);
704
    for(i = 0; i < CPU_NB_REGS; i++)
705
        kenv->regs[i] = env->regs[i];
706
    kenv->eip = env->eip;
707
    kenv->eflags = env->eflags;
708
    for(i = 0; i < 6; i++)
709
        kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
710
    kqemu_load_seg(&kenv->ldt, &env->ldt);
711
    kqemu_load_seg(&kenv->tr, &env->tr);
712
    kqemu_load_seg(&kenv->gdt, &env->gdt);
713
    kqemu_load_seg(&kenv->idt, &env->idt);
714
    kenv->cr0 = env->cr[0];
715
    kenv->cr2 = env->cr[2];
716
    kenv->cr3 = env->cr[3];
717
    kenv->cr4 = env->cr[4];
718
    kenv->a20_mask = env->a20_mask;
719
    kenv->efer = env->efer;
720
    kenv->tsc_offset = 0;
721
    kenv->star = env->star;
722
    kenv->sysenter_cs = env->sysenter_cs;
723
    kenv->sysenter_esp = env->sysenter_esp;
724
    kenv->sysenter_eip = env->sysenter_eip;
725
#ifdef TARGET_X86_64
726
    kenv->lstar = env->lstar;
727
    kenv->cstar = env->cstar;
728
    kenv->fmask = env->fmask;
729
    kenv->kernelgsbase = env->kernelgsbase;
730
#endif
731
    if (env->dr[7] & 0xff) {
732
        kenv->dr7 = env->dr[7];
733
        kenv->dr0 = env->dr[0];
734
        kenv->dr1 = env->dr[1];
735
        kenv->dr2 = env->dr[2];
736
        kenv->dr3 = env->dr[3];
737
    } else {
738
        kenv->dr7 = 0;
739
    }
740
    kenv->dr6 = env->dr[6];
741
    cpl = (env->hflags & HF_CPL_MASK);
742
    kenv->cpl = cpl;
743
    kenv->nb_pages_to_flush = nb_pages_to_flush;
744
    kenv->user_only = (env->kqemu_enabled == 1);
745
    kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
746
    nb_ram_pages_to_update = 0;
747
    kenv->nb_modified_ram_pages = nb_modified_ram_pages;
748

    
749
    kqemu_reset_modified_ram_pages();
750

    
751
    if (env->cpuid_features & CPUID_FXSR)
752
        restore_native_fp_fxrstor(env);
753
    else
754
        restore_native_fp_frstor(env);
755

    
756
#ifdef _WIN32
757
    if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
758
                        kenv, sizeof(struct kqemu_cpu_state),
759
                        kenv, sizeof(struct kqemu_cpu_state),
760
                        &temp, NULL)) {
761
        ret = kenv->retval;
762
    } else {
763
        ret = -1;
764
    }
765
#else
766
    ioctl(kqemu_fd, KQEMU_EXEC, kenv);
767
    ret = kenv->retval;
768
#endif
769
    if (env->cpuid_features & CPUID_FXSR)
770
        save_native_fp_fxsave(env);
771
    else
772
        save_native_fp_fsave(env);
773

    
774
    for(i = 0; i < CPU_NB_REGS; i++)
775
        env->regs[i] = kenv->regs[i];
776
    env->eip = kenv->eip;
777
    env->eflags = kenv->eflags;
778
    for(i = 0; i < 6; i++)
779
        kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
780
    cpu_x86_set_cpl(env, kenv->cpl);
781
    kqemu_save_seg(&env->ldt, &kenv->ldt);
782
    env->cr[0] = kenv->cr0;
783
    env->cr[4] = kenv->cr4;
784
    env->cr[3] = kenv->cr3;
785
    env->cr[2] = kenv->cr2;
786
    env->dr[6] = kenv->dr6;
787
#ifdef TARGET_X86_64
788
    env->kernelgsbase = kenv->kernelgsbase;
789
#endif
790

    
791
    /* flush pages as indicated by kqemu */
792
    if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
793
        tlb_flush(env, 1);
794
    } else {
795
        for(i = 0; i < kenv->nb_pages_to_flush; i++) {
796
            tlb_flush_page(env, pages_to_flush[i]);
797
        }
798
    }
799
    nb_pages_to_flush = 0;
800

    
801
#ifdef CONFIG_PROFILER
802
    kqemu_time += profile_getclock() - ti;
803
    kqemu_exec_count++;
804
#endif
805

    
806
    if (kenv->nb_ram_pages_to_update > 0) {
807
        cpu_tlb_update_dirty(env);
808
    }
809

    
810
    if (kenv->nb_modified_ram_pages > 0) {
811
        for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
812
            unsigned long addr;
813
            addr = modified_ram_pages[i];
814
            tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
815
        }
816
    }
817

    
818
    /* restore the hidden flags */
819
    {
820
        unsigned int new_hflags;
821
#ifdef TARGET_X86_64
822
        if ((env->hflags & HF_LMA_MASK) &&
823
            (env->segs[R_CS].flags & DESC_L_MASK)) {
824
            /* long mode */
825
            new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
826
        } else
827
#endif
828
        {
829
            /* legacy / compatibility case */
830
            new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
831
                >> (DESC_B_SHIFT - HF_CS32_SHIFT);
832
            new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
833
                >> (DESC_B_SHIFT - HF_SS32_SHIFT);
834
            if (!(env->cr[0] & CR0_PE_MASK) ||
835
                   (env->eflags & VM_MASK) ||
836
                   !(env->hflags & HF_CS32_MASK)) {
837
                /* XXX: try to avoid this test. The problem comes from the
838
                   fact that is real mode or vm86 mode we only modify the
839
                   'base' and 'selector' fields of the segment cache to go
840
                   faster. A solution may be to force addseg to one in
841
                   translate-i386.c. */
842
                new_hflags |= HF_ADDSEG_MASK;
843
            } else {
844
                new_hflags |= ((env->segs[R_DS].base |
845
                                env->segs[R_ES].base |
846
                                env->segs[R_SS].base) != 0) <<
847
                    HF_ADDSEG_SHIFT;
848
            }
849
        }
850
        env->hflags = (env->hflags &
851
           ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
852
            new_hflags;
853
    }
854
    /* update FPU flags */
855
    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
856
        ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
857
    if (env->cr[4] & CR4_OSFXSR_MASK)
858
        env->hflags |= HF_OSFXSR_MASK;
859
    else
860
        env->hflags &= ~HF_OSFXSR_MASK;
861

    
862
    LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
863
    if (ret == KQEMU_RET_SYSCALL) {
864
        /* syscall instruction */
865
        return do_syscall(env, kenv);
866
    } else
867
    if ((ret & 0xff00) == KQEMU_RET_INT) {
868
        env->exception_index = ret & 0xff;
869
        env->error_code = 0;
870
        env->exception_is_int = 1;
871
        env->exception_next_eip = kenv->next_eip;
872
#ifdef CONFIG_PROFILER
873
        kqemu_ret_int_count++;
874
#endif
875
        LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
876
        LOG_INT_STATE(env);
877
        return 1;
878
    } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
879
        env->exception_index = ret & 0xff;
880
        env->error_code = kenv->error_code;
881
        env->exception_is_int = 0;
882
        env->exception_next_eip = 0;
883
#ifdef CONFIG_PROFILER
884
        kqemu_ret_excp_count++;
885
#endif
886
        LOG_INT("kqemu: exception v=%02x e=%04x:\n",
887
                    env->exception_index, env->error_code);
888
        LOG_INT_STATE(env);
889
        return 1;
890
    } else if (ret == KQEMU_RET_INTR) {
891
#ifdef CONFIG_PROFILER
892
        kqemu_ret_intr_count++;
893
#endif
894
        LOG_INT_STATE(env);
895
        return 0;
896
    } else if (ret == KQEMU_RET_SOFTMMU) {
897
#ifdef CONFIG_PROFILER
898
        {
899
            unsigned long pc = env->eip + env->segs[R_CS].base;
900
            kqemu_record_pc(pc);
901
        }
902
#endif
903
        LOG_INT_STATE(env);
904
        return 2;
905
    } else {
906
        cpu_dump_state(env, stderr, fprintf, 0);
907
        fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
908
        exit(1);
909
    }
910
    return 0;
911
}
912

    
913
void kqemu_cpu_interrupt(CPUState *env)
914
{
915
#if defined(_WIN32)
916
    /* cancelling the I/O request causes KQEMU to finish executing the
917
       current block and successfully returning. */
918
    CancelIo(kqemu_fd);
919
#endif
920
}
921

    
922
/* 
923
   QEMU paravirtualization interface. The current interface only
924
   allows to modify the IF and IOPL flags when running in
925
   kqemu.
926

927
   At this point it is not very satisfactory. I leave it for reference
928
   as it adds little complexity.
929
*/
930

    
931
#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
932

    
933
static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
934
{
935
    return 0;
936
}
937

    
938
static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
939
{
940
    return 0;
941
}
942

    
943
static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
944
{
945
}
946

    
947
static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
948
{
949
}
950

    
951
static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
952
{
953
    CPUState *env;
954

    
955
    env = cpu_single_env;
956
    if (!env)
957
        return 0;
958
    return env->eflags & (IF_MASK | IOPL_MASK);
959
}
960

    
961
/* Note: after writing to this address, the guest code must make sure
962
   it is exiting the current TB. pushf/popf can be used for that
963
   purpose. */
964
static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
965
{
966
    CPUState *env;
967

    
968
    env = cpu_single_env;
969
    if (!env)
970
        return;
971
    env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) | 
972
        (val & (IF_MASK | IOPL_MASK));
973
}
974

    
975
static CPUReadMemoryFunc *qpi_mem_read[3] = {
976
    qpi_mem_readb,
977
    qpi_mem_readw,
978
    qpi_mem_readl,
979
};
980

    
981
static CPUWriteMemoryFunc *qpi_mem_write[3] = {
982
    qpi_mem_writeb,
983
    qpi_mem_writew,
984
    qpi_mem_writel,
985
};
986

    
987
static void qpi_init(void)
988
{
989
    kqemu_comm_base = 0xff000000 | 1;
990
    qpi_io_memory = cpu_register_io_memory(0, 
991
                                           qpi_mem_read, 
992
                                           qpi_mem_write, NULL);
993
    cpu_register_physical_memory(kqemu_comm_base & ~0xfff, 
994
                                 0x1000, qpi_io_memory);
995
}
996
#endif