Statistics
| Branch: | Revision:

root / kqemu.c @ 46dc3881

History | View | Annotate | Download (28.1 kB)

1
/*
2
 *  KQEMU support
3
 *
4
 *  Copyright (c) 2005-2008 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
 */
20
#include "config.h"
21
#ifdef _WIN32
22
#define WIN32_LEAN_AND_MEAN
23
#include <windows.h>
24
#include <winioctl.h>
25
#else
26
#include <sys/types.h>
27
#include <sys/mman.h>
28
#include <sys/ioctl.h>
29
#endif
30
#ifdef HOST_SOLARIS
31
#include <sys/ioccom.h>
32
#endif
33
#include <stdlib.h>
34
#include <stdio.h>
35
#include <stdarg.h>
36
#include <string.h>
37
#include <errno.h>
38
#include <unistd.h>
39
#include <inttypes.h>
40

    
41
#include "cpu.h"
42
#include "exec-all.h"
43
#include "qemu-common.h"
44

    
45
#ifdef USE_KQEMU
46

    
47
#define DEBUG
48
//#define PROFILE
49

    
50
#include <unistd.h>
51
#include <fcntl.h>
52
#include "kqemu.h"
53

    
54
#ifdef _WIN32
55
#define KQEMU_DEVICE "\\\\.\\kqemu"
56
#else
57
#define KQEMU_DEVICE "/dev/kqemu"
58
#endif
59

    
60
static void qpi_init(void);
61

    
62
#ifdef _WIN32
63
#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
64
HANDLE kqemu_fd = KQEMU_INVALID_FD;
65
#define kqemu_closefd(x) CloseHandle(x)
66
#else
67
#define KQEMU_INVALID_FD -1
68
int kqemu_fd = KQEMU_INVALID_FD;
69
#define kqemu_closefd(x) close(x)
70
#endif
71

    
72
/* 0 = not allowed
73
   1 = user kqemu
74
   2 = kernel kqemu
75
*/
76
int kqemu_allowed = 1;
77
uint64_t *pages_to_flush;
78
unsigned int nb_pages_to_flush;
79
uint64_t *ram_pages_to_update;
80
unsigned int nb_ram_pages_to_update;
81
uint64_t *modified_ram_pages;
82
unsigned int nb_modified_ram_pages;
83
uint8_t *modified_ram_pages_table;
84
int qpi_io_memory;
85
uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
86

    
87
#define cpuid(index, eax, ebx, ecx, edx) \
88
  asm volatile ("cpuid" \
89
                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
90
                : "0" (index))
91

    
92
#ifdef __x86_64__
93
static int is_cpuid_supported(void)
94
{
95
    return 1;
96
}
97
#else
98
static int is_cpuid_supported(void)
99
{
100
    int v0, v1;
101
    asm volatile ("pushf\n"
102
                  "popl %0\n"
103
                  "movl %0, %1\n"
104
                  "xorl $0x00200000, %0\n"
105
                  "pushl %0\n"
106
                  "popf\n"
107
                  "pushf\n"
108
                  "popl %0\n"
109
                  : "=a" (v0), "=d" (v1)
110
                  :
111
                  : "cc");
112
    return (v0 != v1);
113
}
114
#endif
115

    
116
static void kqemu_update_cpuid(CPUState *env)
117
{
118
    int critical_features_mask, features, ext_features, ext_features_mask;
119
    uint32_t eax, ebx, ecx, edx;
120

    
121
    /* the following features are kept identical on the host and
122
       target cpus because they are important for user code. Strictly
123
       speaking, only SSE really matters because the OS must support
124
       it if the user code uses it. */
125
    critical_features_mask =
126
        CPUID_CMOV | CPUID_CX8 |
127
        CPUID_FXSR | CPUID_MMX | CPUID_SSE |
128
        CPUID_SSE2 | CPUID_SEP;
129
    ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
130
    if (!is_cpuid_supported()) {
131
        features = 0;
132
        ext_features = 0;
133
    } else {
134
        cpuid(1, eax, ebx, ecx, edx);
135
        features = edx;
136
        ext_features = ecx;
137
    }
138
#ifdef __x86_64__
139
    /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
140
       compatibility mode, so in order to have the best performances
141
       it is better not to use it */
142
    features &= ~CPUID_SEP;
143
#endif
144
    env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
145
        (features & critical_features_mask);
146
    env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
147
        (ext_features & ext_features_mask);
148
    /* XXX: we could update more of the target CPUID state so that the
149
       non accelerated code sees exactly the same CPU features as the
150
       accelerated code */
151
}
152

    
153
int kqemu_init(CPUState *env)
154
{
155
    struct kqemu_init kinit;
156
    int ret, version;
157
#ifdef _WIN32
158
    DWORD temp;
159
#endif
160

    
161
    if (!kqemu_allowed)
162
        return -1;
163

    
164
#ifdef _WIN32
165
    kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
166
                          FILE_SHARE_READ | FILE_SHARE_WRITE,
167
                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
168
                          NULL);
169
#else
170
    kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
171
#endif
172
    if (kqemu_fd == KQEMU_INVALID_FD) {
173
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
174
                KQEMU_DEVICE, strerror(errno));
175
        return -1;
176
    }
177
    version = 0;
178
#ifdef _WIN32
179
    DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
180
                    &version, sizeof(version), &temp, NULL);
181
#else
182
    ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
183
#endif
184
    if (version != KQEMU_VERSION) {
185
        fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
186
                version, KQEMU_VERSION);
187
        goto fail;
188
    }
189

    
190
    pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
191
                                  sizeof(uint64_t));
192
    if (!pages_to_flush)
193
        goto fail;
194

    
195
    ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
196
                                       sizeof(uint64_t));
197
    if (!ram_pages_to_update)
198
        goto fail;
199

    
200
    modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
201
                                      sizeof(uint64_t));
202
    if (!modified_ram_pages)
203
        goto fail;
204
    modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
205
    if (!modified_ram_pages_table)
206
        goto fail;
207

    
208
    memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
209
    kinit.ram_base = phys_ram_base;
210
    kinit.ram_size = phys_ram_size;
211
    kinit.ram_dirty = phys_ram_dirty;
212
    kinit.pages_to_flush = pages_to_flush;
213
    kinit.ram_pages_to_update = ram_pages_to_update;
214
    kinit.modified_ram_pages = modified_ram_pages;
215
#ifdef _WIN32
216
    ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
217
                          NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
218
#else
219
    ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
220
#endif
221
    if (ret < 0) {
222
        fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
223
    fail:
224
        kqemu_closefd(kqemu_fd);
225
        kqemu_fd = KQEMU_INVALID_FD;
226
        return -1;
227
    }
228
    kqemu_update_cpuid(env);
229
    env->kqemu_enabled = kqemu_allowed;
230
    nb_pages_to_flush = 0;
231
    nb_ram_pages_to_update = 0;
232

    
233
    qpi_init();
234
    return 0;
235
}
236

    
237
void kqemu_flush_page(CPUState *env, target_ulong addr)
238
{
239
#if defined(DEBUG)
240
    if (loglevel & CPU_LOG_INT) {
241
        fprintf(logfile, "kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
242
    }
243
#endif
244
    if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
245
        nb_pages_to_flush = KQEMU_FLUSH_ALL;
246
    else
247
        pages_to_flush[nb_pages_to_flush++] = addr;
248
}
249

    
250
void kqemu_flush(CPUState *env, int global)
251
{
252
#ifdef DEBUG
253
    if (loglevel & CPU_LOG_INT) {
254
        fprintf(logfile, "kqemu_flush:\n");
255
    }
256
#endif
257
    nb_pages_to_flush = KQEMU_FLUSH_ALL;
258
}
259

    
260
void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
261
{
262
#ifdef DEBUG
263
    if (loglevel & CPU_LOG_INT) {
264
        fprintf(logfile, "kqemu_set_notdirty: addr=%08lx\n", 
265
                (unsigned long)ram_addr);
266
    }
267
#endif
268
    /* we only track transitions to dirty state */
269
    if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
270
        return;
271
    if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
272
        nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
273
    else
274
        ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
275
}
276

    
277
static void kqemu_reset_modified_ram_pages(void)
278
{
279
    int i;
280
    unsigned long page_index;
281

    
282
    for(i = 0; i < nb_modified_ram_pages; i++) {
283
        page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
284
        modified_ram_pages_table[page_index] = 0;
285
    }
286
    nb_modified_ram_pages = 0;
287
}
288

    
289
void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
290
{
291
    unsigned long page_index;
292
    int ret;
293
#ifdef _WIN32
294
    DWORD temp;
295
#endif
296

    
297
    page_index = ram_addr >> TARGET_PAGE_BITS;
298
    if (!modified_ram_pages_table[page_index]) {
299
#if 0
300
        printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
301
#endif
302
        modified_ram_pages_table[page_index] = 1;
303
        modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
304
        if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
305
            /* flush */
306
#ifdef _WIN32
307
            ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
308
                                  &nb_modified_ram_pages,
309
                                  sizeof(nb_modified_ram_pages),
310
                                  NULL, 0, &temp, NULL);
311
#else
312
            ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
313
                        &nb_modified_ram_pages);
314
#endif
315
            kqemu_reset_modified_ram_pages();
316
        }
317
    }
318
}
319

    
320
void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size, 
321
                        ram_addr_t phys_offset)
322
{
323
    struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
324
    uint64_t end;
325
    int ret, io_index;
326

    
327
    end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
328
    start_addr &= TARGET_PAGE_MASK;
329
    kphys_mem->phys_addr = start_addr;
330
    kphys_mem->size = end - start_addr;
331
    kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
332
    io_index = phys_offset & ~TARGET_PAGE_MASK;
333
    switch(io_index) {
334
    case IO_MEM_RAM:
335
        kphys_mem->io_index = KQEMU_IO_MEM_RAM;
336
        break;
337
    case IO_MEM_ROM:
338
        kphys_mem->io_index = KQEMU_IO_MEM_ROM;
339
        break;
340
    default:
341
        if (qpi_io_memory == io_index) {
342
            kphys_mem->io_index = KQEMU_IO_MEM_COMM;
343
        } else {
344
            kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
345
        }
346
        break;
347
    }
348
#ifdef _WIN32
349
    {
350
        DWORD temp;
351
        ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM, 
352
                              kphys_mem, sizeof(*kphys_mem),
353
                              NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
354
    }
355
#else
356
    ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
357
#endif
358
    if (ret < 0) {
359
        fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
360
                ret, start_addr, 
361
                (unsigned long)size, (unsigned long)phys_offset);
362
    }
363
}
364

    
365
struct fpstate {
366
    uint16_t fpuc;
367
    uint16_t dummy1;
368
    uint16_t fpus;
369
    uint16_t dummy2;
370
    uint16_t fptag;
371
    uint16_t dummy3;
372

    
373
    uint32_t fpip;
374
    uint32_t fpcs;
375
    uint32_t fpoo;
376
    uint32_t fpos;
377
    uint8_t fpregs1[8 * 10];
378
};
379

    
380
struct fpxstate {
381
    uint16_t fpuc;
382
    uint16_t fpus;
383
    uint16_t fptag;
384
    uint16_t fop;
385
    uint32_t fpuip;
386
    uint16_t cs_sel;
387
    uint16_t dummy0;
388
    uint32_t fpudp;
389
    uint16_t ds_sel;
390
    uint16_t dummy1;
391
    uint32_t mxcsr;
392
    uint32_t mxcsr_mask;
393
    uint8_t fpregs1[8 * 16];
394
    uint8_t xmm_regs[16 * 16];
395
    uint8_t dummy2[96];
396
};
397

    
398
static struct fpxstate fpx1 __attribute__((aligned(16)));
399

    
400
static void restore_native_fp_frstor(CPUState *env)
401
{
402
    int fptag, i, j;
403
    struct fpstate fp1, *fp = &fp1;
404

    
405
    fp->fpuc = env->fpuc;
406
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
407
    fptag = 0;
408
    for (i=7; i>=0; i--) {
409
        fptag <<= 2;
410
        if (env->fptags[i]) {
411
            fptag |= 3;
412
        } else {
413
            /* the FPU automatically computes it */
414
        }
415
    }
416
    fp->fptag = fptag;
417
    j = env->fpstt;
418
    for(i = 0;i < 8; i++) {
419
        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
420
        j = (j + 1) & 7;
421
    }
422
    asm volatile ("frstor %0" : "=m" (*fp));
423
}
424

    
425
static void save_native_fp_fsave(CPUState *env)
426
{
427
    int fptag, i, j;
428
    uint16_t fpuc;
429
    struct fpstate fp1, *fp = &fp1;
430

    
431
    asm volatile ("fsave %0" : : "m" (*fp));
432
    env->fpuc = fp->fpuc;
433
    env->fpstt = (fp->fpus >> 11) & 7;
434
    env->fpus = fp->fpus & ~0x3800;
435
    fptag = fp->fptag;
436
    for(i = 0;i < 8; i++) {
437
        env->fptags[i] = ((fptag & 3) == 3);
438
        fptag >>= 2;
439
    }
440
    j = env->fpstt;
441
    for(i = 0;i < 8; i++) {
442
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
443
        j = (j + 1) & 7;
444
    }
445
    /* we must restore the default rounding state */
446
    fpuc = 0x037f | (env->fpuc & (3 << 10));
447
    asm volatile("fldcw %0" : : "m" (fpuc));
448
}
449

    
450
static void restore_native_fp_fxrstor(CPUState *env)
451
{
452
    struct fpxstate *fp = &fpx1;
453
    int i, j, fptag;
454

    
455
    fp->fpuc = env->fpuc;
456
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
457
    fptag = 0;
458
    for(i = 0; i < 8; i++)
459
        fptag |= (env->fptags[i] << i);
460
    fp->fptag = fptag ^ 0xff;
461

    
462
    j = env->fpstt;
463
    for(i = 0;i < 8; i++) {
464
        memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
465
        j = (j + 1) & 7;
466
    }
467
    if (env->cpuid_features & CPUID_SSE) {
468
        fp->mxcsr = env->mxcsr;
469
        /* XXX: check if DAZ is not available */
470
        fp->mxcsr_mask = 0xffff;
471
        memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
472
    }
473
    asm volatile ("fxrstor %0" : "=m" (*fp));
474
}
475

    
476
static void save_native_fp_fxsave(CPUState *env)
477
{
478
    struct fpxstate *fp = &fpx1;
479
    int fptag, i, j;
480
    uint16_t fpuc;
481

    
482
    asm volatile ("fxsave %0" : : "m" (*fp));
483
    env->fpuc = fp->fpuc;
484
    env->fpstt = (fp->fpus >> 11) & 7;
485
    env->fpus = fp->fpus & ~0x3800;
486
    fptag = fp->fptag ^ 0xff;
487
    for(i = 0;i < 8; i++) {
488
        env->fptags[i] = (fptag >> i) & 1;
489
    }
490
    j = env->fpstt;
491
    for(i = 0;i < 8; i++) {
492
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
493
        j = (j + 1) & 7;
494
    }
495
    if (env->cpuid_features & CPUID_SSE) {
496
        env->mxcsr = fp->mxcsr;
497
        memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
498
    }
499

    
500
    /* we must restore the default rounding state */
501
    asm volatile ("fninit");
502
    fpuc = 0x037f | (env->fpuc & (3 << 10));
503
    asm volatile("fldcw %0" : : "m" (fpuc));
504
}
505

    
506
static int do_syscall(CPUState *env,
507
                      struct kqemu_cpu_state *kenv)
508
{
509
    int selector;
510

    
511
    selector = (env->star >> 32) & 0xffff;
512
#ifdef TARGET_X86_64
513
    if (env->hflags & HF_LMA_MASK) {
514
        int code64;
515

    
516
        env->regs[R_ECX] = kenv->next_eip;
517
        env->regs[11] = env->eflags;
518

    
519
        code64 = env->hflags & HF_CS64_MASK;
520

    
521
        cpu_x86_set_cpl(env, 0);
522
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
523
                               0, 0xffffffff,
524
                               DESC_G_MASK | DESC_P_MASK |
525
                               DESC_S_MASK |
526
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
527
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
528
                               0, 0xffffffff,
529
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
530
                               DESC_S_MASK |
531
                               DESC_W_MASK | DESC_A_MASK);
532
        env->eflags &= ~env->fmask;
533
        if (code64)
534
            env->eip = env->lstar;
535
        else
536
            env->eip = env->cstar;
537
    } else
538
#endif
539
    {
540
        env->regs[R_ECX] = (uint32_t)kenv->next_eip;
541

    
542
        cpu_x86_set_cpl(env, 0);
543
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
544
                           0, 0xffffffff,
545
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
546
                               DESC_S_MASK |
547
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
548
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
549
                               0, 0xffffffff,
550
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
551
                               DESC_S_MASK |
552
                               DESC_W_MASK | DESC_A_MASK);
553
        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
554
        env->eip = (uint32_t)env->star;
555
    }
556
    return 2;
557
}
558

    
559
#ifdef CONFIG_PROFILER
560

    
561
#define PC_REC_SIZE 1
562
#define PC_REC_HASH_BITS 16
563
#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
564

    
565
typedef struct PCRecord {
566
    unsigned long pc;
567
    int64_t count;
568
    struct PCRecord *next;
569
} PCRecord;
570

    
571
static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
572
static int nb_pc_records;
573

    
574
static void kqemu_record_pc(unsigned long pc)
575
{
576
    unsigned long h;
577
    PCRecord **pr, *r;
578

    
579
    h = pc / PC_REC_SIZE;
580
    h = h ^ (h >> PC_REC_HASH_BITS);
581
    h &= (PC_REC_HASH_SIZE - 1);
582
    pr = &pc_rec_hash[h];
583
    for(;;) {
584
        r = *pr;
585
        if (r == NULL)
586
            break;
587
        if (r->pc == pc) {
588
            r->count++;
589
            return;
590
        }
591
        pr = &r->next;
592
    }
593
    r = malloc(sizeof(PCRecord));
594
    r->count = 1;
595
    r->pc = pc;
596
    r->next = NULL;
597
    *pr = r;
598
    nb_pc_records++;
599
}
600

    
601
static int pc_rec_cmp(const void *p1, const void *p2)
602
{
603
    PCRecord *r1 = *(PCRecord **)p1;
604
    PCRecord *r2 = *(PCRecord **)p2;
605
    if (r1->count < r2->count)
606
        return 1;
607
    else if (r1->count == r2->count)
608
        return 0;
609
    else
610
        return -1;
611
}
612

    
613
static void kqemu_record_flush(void)
614
{
615
    PCRecord *r, *r_next;
616
    int h;
617

    
618
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
619
        for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
620
            r_next = r->next;
621
            free(r);
622
        }
623
        pc_rec_hash[h] = NULL;
624
    }
625
    nb_pc_records = 0;
626
}
627

    
628
void kqemu_record_dump(void)
629
{
630
    PCRecord **pr, *r;
631
    int i, h;
632
    FILE *f;
633
    int64_t total, sum;
634

    
635
    pr = malloc(sizeof(PCRecord *) * nb_pc_records);
636
    i = 0;
637
    total = 0;
638
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
639
        for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
640
            pr[i++] = r;
641
            total += r->count;
642
        }
643
    }
644
    qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
645

    
646
    f = fopen("/tmp/kqemu.stats", "w");
647
    if (!f) {
648
        perror("/tmp/kqemu.stats");
649
        exit(1);
650
    }
651
    fprintf(f, "total: %" PRId64 "\n", total);
652
    sum = 0;
653
    for(i = 0; i < nb_pc_records; i++) {
654
        r = pr[i];
655
        sum += r->count;
656
        fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
657
                r->pc,
658
                r->count,
659
                (double)r->count / (double)total * 100.0,
660
                (double)sum / (double)total * 100.0);
661
    }
662
    fclose(f);
663
    free(pr);
664

    
665
    kqemu_record_flush();
666
}
667
#endif
668

    
669
static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
670
                                  const SegmentCache *sc)
671
{
672
    ksc->selector = sc->selector;
673
    ksc->flags = sc->flags;
674
    ksc->limit = sc->limit;
675
    ksc->base = sc->base;
676
}
677

    
678
static inline void kqemu_save_seg(SegmentCache *sc,
679
                                  const struct kqemu_segment_cache *ksc)
680
{
681
    sc->selector = ksc->selector;
682
    sc->flags = ksc->flags;
683
    sc->limit = ksc->limit;
684
    sc->base = ksc->base;
685
}
686

    
687
int kqemu_cpu_exec(CPUState *env)
688
{
689
    struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
690
    int ret, cpl, i;
691
#ifdef CONFIG_PROFILER
692
    int64_t ti;
693
#endif
694
#ifdef _WIN32
695
    DWORD temp;
696
#endif
697

    
698
#ifdef CONFIG_PROFILER
699
    ti = profile_getclock();
700
#endif
701
#ifdef DEBUG
702
    if (loglevel & CPU_LOG_INT) {
703
        fprintf(logfile, "kqemu: cpu_exec: enter\n");
704
        cpu_dump_state(env, logfile, fprintf, 0);
705
    }
706
#endif
707
    for(i = 0; i < CPU_NB_REGS; i++)
708
        kenv->regs[i] = env->regs[i];
709
    kenv->eip = env->eip;
710
    kenv->eflags = env->eflags;
711
    for(i = 0; i < 6; i++)
712
        kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
713
    kqemu_load_seg(&kenv->ldt, &env->ldt);
714
    kqemu_load_seg(&kenv->tr, &env->tr);
715
    kqemu_load_seg(&kenv->gdt, &env->gdt);
716
    kqemu_load_seg(&kenv->idt, &env->idt);
717
    kenv->cr0 = env->cr[0];
718
    kenv->cr2 = env->cr[2];
719
    kenv->cr3 = env->cr[3];
720
    kenv->cr4 = env->cr[4];
721
    kenv->a20_mask = env->a20_mask;
722
    kenv->efer = env->efer;
723
    kenv->tsc_offset = 0;
724
    kenv->star = env->star;
725
    kenv->sysenter_cs = env->sysenter_cs;
726
    kenv->sysenter_esp = env->sysenter_esp;
727
    kenv->sysenter_eip = env->sysenter_eip;
728
#ifdef TARGET_X86_64
729
    kenv->lstar = env->lstar;
730
    kenv->cstar = env->cstar;
731
    kenv->fmask = env->fmask;
732
    kenv->kernelgsbase = env->kernelgsbase;
733
#endif
734
    if (env->dr[7] & 0xff) {
735
        kenv->dr7 = env->dr[7];
736
        kenv->dr0 = env->dr[0];
737
        kenv->dr1 = env->dr[1];
738
        kenv->dr2 = env->dr[2];
739
        kenv->dr3 = env->dr[3];
740
    } else {
741
        kenv->dr7 = 0;
742
    }
743
    kenv->dr6 = env->dr[6];
744
    cpl = (env->hflags & HF_CPL_MASK);
745
    kenv->cpl = cpl;
746
    kenv->nb_pages_to_flush = nb_pages_to_flush;
747
    kenv->user_only = (env->kqemu_enabled == 1);
748
    kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
749
    nb_ram_pages_to_update = 0;
750
    kenv->nb_modified_ram_pages = nb_modified_ram_pages;
751

    
752
    kqemu_reset_modified_ram_pages();
753

    
754
    if (env->cpuid_features & CPUID_FXSR)
755
        restore_native_fp_fxrstor(env);
756
    else
757
        restore_native_fp_frstor(env);
758

    
759
#ifdef _WIN32
760
    if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
761
                        kenv, sizeof(struct kqemu_cpu_state),
762
                        kenv, sizeof(struct kqemu_cpu_state),
763
                        &temp, NULL)) {
764
        ret = kenv->retval;
765
    } else {
766
        ret = -1;
767
    }
768
#else
769
    ioctl(kqemu_fd, KQEMU_EXEC, kenv);
770
    ret = kenv->retval;
771
#endif
772
    if (env->cpuid_features & CPUID_FXSR)
773
        save_native_fp_fxsave(env);
774
    else
775
        save_native_fp_fsave(env);
776

    
777
    for(i = 0; i < CPU_NB_REGS; i++)
778
        env->regs[i] = kenv->regs[i];
779
    env->eip = kenv->eip;
780
    env->eflags = kenv->eflags;
781
    for(i = 0; i < 6; i++)
782
        kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
783
    cpu_x86_set_cpl(env, kenv->cpl);
784
    kqemu_save_seg(&env->ldt, &kenv->ldt);
785
    env->cr[0] = kenv->cr0;
786
    env->cr[4] = kenv->cr4;
787
    env->cr[3] = kenv->cr3;
788
    env->cr[2] = kenv->cr2;
789
    env->dr[6] = kenv->dr6;
790
#ifdef TARGET_X86_64
791
    env->kernelgsbase = kenv->kernelgsbase;
792
#endif
793

    
794
    /* flush pages as indicated by kqemu */
795
    if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
796
        tlb_flush(env, 1);
797
    } else {
798
        for(i = 0; i < kenv->nb_pages_to_flush; i++) {
799
            tlb_flush_page(env, pages_to_flush[i]);
800
        }
801
    }
802
    nb_pages_to_flush = 0;
803

    
804
#ifdef CONFIG_PROFILER
805
    kqemu_time += profile_getclock() - ti;
806
    kqemu_exec_count++;
807
#endif
808

    
809
    if (kenv->nb_ram_pages_to_update > 0) {
810
        cpu_tlb_update_dirty(env);
811
    }
812

    
813
    if (kenv->nb_modified_ram_pages > 0) {
814
        for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
815
            unsigned long addr;
816
            addr = modified_ram_pages[i];
817
            tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
818
        }
819
    }
820

    
821
    /* restore the hidden flags */
822
    {
823
        unsigned int new_hflags;
824
#ifdef TARGET_X86_64
825
        if ((env->hflags & HF_LMA_MASK) &&
826
            (env->segs[R_CS].flags & DESC_L_MASK)) {
827
            /* long mode */
828
            new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
829
        } else
830
#endif
831
        {
832
            /* legacy / compatibility case */
833
            new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
834
                >> (DESC_B_SHIFT - HF_CS32_SHIFT);
835
            new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
836
                >> (DESC_B_SHIFT - HF_SS32_SHIFT);
837
            if (!(env->cr[0] & CR0_PE_MASK) ||
838
                   (env->eflags & VM_MASK) ||
839
                   !(env->hflags & HF_CS32_MASK)) {
840
                /* XXX: try to avoid this test. The problem comes from the
841
                   fact that is real mode or vm86 mode we only modify the
842
                   'base' and 'selector' fields of the segment cache to go
843
                   faster. A solution may be to force addseg to one in
844
                   translate-i386.c. */
845
                new_hflags |= HF_ADDSEG_MASK;
846
            } else {
847
                new_hflags |= ((env->segs[R_DS].base |
848
                                env->segs[R_ES].base |
849
                                env->segs[R_SS].base) != 0) <<
850
                    HF_ADDSEG_SHIFT;
851
            }
852
        }
853
        env->hflags = (env->hflags &
854
           ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
855
            new_hflags;
856
    }
857
    /* update FPU flags */
858
    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
859
        ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
860
    if (env->cr[4] & CR4_OSFXSR_MASK)
861
        env->hflags |= HF_OSFXSR_MASK;
862
    else
863
        env->hflags &= ~HF_OSFXSR_MASK;
864

    
865
#ifdef DEBUG
866
    if (loglevel & CPU_LOG_INT) {
867
        fprintf(logfile, "kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
868
    }
869
#endif
870
    if (ret == KQEMU_RET_SYSCALL) {
871
        /* syscall instruction */
872
        return do_syscall(env, kenv);
873
    } else
874
    if ((ret & 0xff00) == KQEMU_RET_INT) {
875
        env->exception_index = ret & 0xff;
876
        env->error_code = 0;
877
        env->exception_is_int = 1;
878
        env->exception_next_eip = kenv->next_eip;
879
#ifdef CONFIG_PROFILER
880
        kqemu_ret_int_count++;
881
#endif
882
#ifdef DEBUG
883
        if (loglevel & CPU_LOG_INT) {
884
            fprintf(logfile, "kqemu: interrupt v=%02x:\n",
885
                    env->exception_index);
886
            cpu_dump_state(env, logfile, fprintf, 0);
887
        }
888
#endif
889
        return 1;
890
    } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
891
        env->exception_index = ret & 0xff;
892
        env->error_code = kenv->error_code;
893
        env->exception_is_int = 0;
894
        env->exception_next_eip = 0;
895
#ifdef CONFIG_PROFILER
896
        kqemu_ret_excp_count++;
897
#endif
898
#ifdef DEBUG
899
        if (loglevel & CPU_LOG_INT) {
900
            fprintf(logfile, "kqemu: exception v=%02x e=%04x:\n",
901
                    env->exception_index, env->error_code);
902
            cpu_dump_state(env, logfile, fprintf, 0);
903
        }
904
#endif
905
        return 1;
906
    } else if (ret == KQEMU_RET_INTR) {
907
#ifdef CONFIG_PROFILER
908
        kqemu_ret_intr_count++;
909
#endif
910
#ifdef DEBUG
911
        if (loglevel & CPU_LOG_INT) {
912
            cpu_dump_state(env, logfile, fprintf, 0);
913
        }
914
#endif
915
        return 0;
916
    } else if (ret == KQEMU_RET_SOFTMMU) {
917
#ifdef CONFIG_PROFILER
918
        {
919
            unsigned long pc = env->eip + env->segs[R_CS].base;
920
            kqemu_record_pc(pc);
921
        }
922
#endif
923
#ifdef DEBUG
924
        if (loglevel & CPU_LOG_INT) {
925
            cpu_dump_state(env, logfile, fprintf, 0);
926
        }
927
#endif
928
        return 2;
929
    } else {
930
        cpu_dump_state(env, stderr, fprintf, 0);
931
        fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
932
        exit(1);
933
    }
934
    return 0;
935
}
936

    
937
void kqemu_cpu_interrupt(CPUState *env)
938
{
939
#if defined(_WIN32)
940
    /* cancelling the I/O request causes KQEMU to finish executing the
941
       current block and successfully returning. */
942
    CancelIo(kqemu_fd);
943
#endif
944
}
945

    
946
/* 
947
   QEMU paravirtualization interface. The current interface only
948
   allows to modify the IF and IOPL flags when running in
949
   kqemu.
950

951
   At this point it is not very satisfactory. I leave it for reference
952
   as it adds little complexity.
953
*/
954

    
955
#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
956

    
957
static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
958
{
959
    return 0;
960
}
961

    
962
static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
963
{
964
    return 0;
965
}
966

    
967
static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
968
{
969
}
970

    
971
static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
972
{
973
}
974

    
975
static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
976
{
977
    CPUState *env;
978

    
979
    env = cpu_single_env;
980
    if (!env)
981
        return 0;
982
    return env->eflags & (IF_MASK | IOPL_MASK);
983
}
984

    
985
/* Note: after writing to this address, the guest code must make sure
986
   it is exiting the current TB. pushf/popf can be used for that
987
   purpose. */
988
static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
989
{
990
    CPUState *env;
991

    
992
    env = cpu_single_env;
993
    if (!env)
994
        return;
995
    env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) | 
996
        (val & (IF_MASK | IOPL_MASK));
997
}
998

    
999
static CPUReadMemoryFunc *qpi_mem_read[3] = {
1000
    qpi_mem_readb,
1001
    qpi_mem_readw,
1002
    qpi_mem_readl,
1003
};
1004

    
1005
static CPUWriteMemoryFunc *qpi_mem_write[3] = {
1006
    qpi_mem_writeb,
1007
    qpi_mem_writew,
1008
    qpi_mem_writel,
1009
};
1010

    
1011
static void qpi_init(void)
1012
{
1013
    kqemu_comm_base = 0xff000000 | 1;
1014
    qpi_io_memory = cpu_register_io_memory(0, 
1015
                                           qpi_mem_read, 
1016
                                           qpi_mem_write, NULL);
1017
    cpu_register_physical_memory(kqemu_comm_base & ~0xfff, 
1018
                                 0x1000, qpi_io_memory);
1019
}
1020
#endif