Statistics
| Branch: | Revision:

root / kqemu.c @ fad6cb1a

History | View | Annotate | Download (28.4 kB)

1
/*
2
 *  KQEMU support
3
 *
4
 *  Copyright (c) 2005-2008 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
19
 */
20
#include "config.h"
21
#ifdef _WIN32
22
#define WIN32_LEAN_AND_MEAN
23
#include <windows.h>
24
#include <winioctl.h>
25
#else
26
#include <sys/types.h>
27
#include <sys/mman.h>
28
#include <sys/ioctl.h>
29
#endif
30
#ifdef HOST_SOLARIS
31
#include <sys/ioccom.h>
32
#endif
33
#include <stdlib.h>
34
#include <stdio.h>
35
#include <stdarg.h>
36
#include <string.h>
37
#include <errno.h>
38
#include <unistd.h>
39
#include <inttypes.h>
40

    
41
#include "cpu.h"
42
#include "exec-all.h"
43
#include "qemu-common.h"
44

    
45
#ifdef USE_KQEMU
46

    
47
#define DEBUG
48
//#define PROFILE
49

    
50
#include <unistd.h>
51
#include <fcntl.h>
52
#include "kqemu.h"
53

    
54
#ifdef _WIN32
55
#define KQEMU_DEVICE "\\\\.\\kqemu"
56
#else
57
#define KQEMU_DEVICE "/dev/kqemu"
58
#endif
59

    
60
static void qpi_init(void);
61

    
62
#ifdef _WIN32
63
#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
64
HANDLE kqemu_fd = KQEMU_INVALID_FD;
65
#define kqemu_closefd(x) CloseHandle(x)
66
#else
67
#define KQEMU_INVALID_FD -1
68
int kqemu_fd = KQEMU_INVALID_FD;
69
#define kqemu_closefd(x) close(x)
70
#endif
71

    
72
/* 0 = not allowed
73
   1 = user kqemu
74
   2 = kernel kqemu
75
*/
76
int kqemu_allowed = 1;
77
uint64_t *pages_to_flush;
78
unsigned int nb_pages_to_flush;
79
uint64_t *ram_pages_to_update;
80
unsigned int nb_ram_pages_to_update;
81
uint64_t *modified_ram_pages;
82
unsigned int nb_modified_ram_pages;
83
uint8_t *modified_ram_pages_table;
84
int qpi_io_memory;
85
uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
86

    
87
#define cpuid(index, eax, ebx, ecx, edx) \
88
  asm volatile ("cpuid" \
89
                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
90
                : "0" (index))
91

    
92
#ifdef __x86_64__
93
static int is_cpuid_supported(void)
94
{
95
    return 1;
96
}
97
#else
98
static int is_cpuid_supported(void)
99
{
100
    int v0, v1;
101
    asm volatile ("pushf\n"
102
                  "popl %0\n"
103
                  "movl %0, %1\n"
104
                  "xorl $0x00200000, %0\n"
105
                  "pushl %0\n"
106
                  "popf\n"
107
                  "pushf\n"
108
                  "popl %0\n"
109
                  : "=a" (v0), "=d" (v1)
110
                  :
111
                  : "cc");
112
    return (v0 != v1);
113
}
114
#endif
115

    
116
static void kqemu_update_cpuid(CPUState *env)
117
{
118
    int critical_features_mask, features, ext_features, ext_features_mask;
119
    uint32_t eax, ebx, ecx, edx;
120

    
121
    /* the following features are kept identical on the host and
122
       target cpus because they are important for user code. Strictly
123
       speaking, only SSE really matters because the OS must support
124
       it if the user code uses it. */
125
    critical_features_mask =
126
        CPUID_CMOV | CPUID_CX8 |
127
        CPUID_FXSR | CPUID_MMX | CPUID_SSE |
128
        CPUID_SSE2 | CPUID_SEP;
129
    ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
130
    if (!is_cpuid_supported()) {
131
        features = 0;
132
        ext_features = 0;
133
    } else {
134
        cpuid(1, eax, ebx, ecx, edx);
135
        features = edx;
136
        ext_features = ecx;
137
    }
138
#ifdef __x86_64__
139
    /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
140
       compatibility mode, so in order to have the best performances
141
       it is better not to use it */
142
    features &= ~CPUID_SEP;
143
#endif
144
    env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
145
        (features & critical_features_mask);
146
    env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
147
        (ext_features & ext_features_mask);
148
    /* XXX: we could update more of the target CPUID state so that the
149
       non accelerated code sees exactly the same CPU features as the
150
       accelerated code */
151
}
152

    
153
int kqemu_init(CPUState *env)
154
{
155
    struct kqemu_init kinit;
156
    int ret, version;
157
#ifdef _WIN32
158
    DWORD temp;
159
#endif
160

    
161
    if (!kqemu_allowed)
162
        return -1;
163

    
164
#ifdef _WIN32
165
    kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
166
                          FILE_SHARE_READ | FILE_SHARE_WRITE,
167
                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
168
                          NULL);
169
    if (kqemu_fd == KQEMU_INVALID_FD) {
170
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
171
                KQEMU_DEVICE, GetLastError());
172
        return -1;
173
    }
174
#else
175
    kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
176
    if (kqemu_fd == KQEMU_INVALID_FD) {
177
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
178
                KQEMU_DEVICE, strerror(errno));
179
        return -1;
180
    }
181
#endif
182
    version = 0;
183
#ifdef _WIN32
184
    DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
185
                    &version, sizeof(version), &temp, NULL);
186
#else
187
    ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
188
#endif
189
    if (version != KQEMU_VERSION) {
190
        fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
191
                version, KQEMU_VERSION);
192
        goto fail;
193
    }
194

    
195
    pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
196
                                  sizeof(uint64_t));
197
    if (!pages_to_flush)
198
        goto fail;
199

    
200
    ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
201
                                       sizeof(uint64_t));
202
    if (!ram_pages_to_update)
203
        goto fail;
204

    
205
    modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
206
                                      sizeof(uint64_t));
207
    if (!modified_ram_pages)
208
        goto fail;
209
    modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
210
    if (!modified_ram_pages_table)
211
        goto fail;
212

    
213
    memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
214
    kinit.ram_base = phys_ram_base;
215
    kinit.ram_size = phys_ram_size;
216
    kinit.ram_dirty = phys_ram_dirty;
217
    kinit.pages_to_flush = pages_to_flush;
218
    kinit.ram_pages_to_update = ram_pages_to_update;
219
    kinit.modified_ram_pages = modified_ram_pages;
220
#ifdef _WIN32
221
    ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
222
                          NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
223
#else
224
    ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
225
#endif
226
    if (ret < 0) {
227
        fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
228
    fail:
229
        kqemu_closefd(kqemu_fd);
230
        kqemu_fd = KQEMU_INVALID_FD;
231
        return -1;
232
    }
233
    kqemu_update_cpuid(env);
234
    env->kqemu_enabled = kqemu_allowed;
235
    nb_pages_to_flush = 0;
236
    nb_ram_pages_to_update = 0;
237

    
238
    qpi_init();
239
    return 0;
240
}
241

    
242
void kqemu_flush_page(CPUState *env, target_ulong addr)
243
{
244
#if defined(DEBUG)
245
    if (loglevel & CPU_LOG_INT) {
246
        fprintf(logfile, "kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
247
    }
248
#endif
249
    if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
250
        nb_pages_to_flush = KQEMU_FLUSH_ALL;
251
    else
252
        pages_to_flush[nb_pages_to_flush++] = addr;
253
}
254

    
255
void kqemu_flush(CPUState *env, int global)
256
{
257
#ifdef DEBUG
258
    if (loglevel & CPU_LOG_INT) {
259
        fprintf(logfile, "kqemu_flush:\n");
260
    }
261
#endif
262
    nb_pages_to_flush = KQEMU_FLUSH_ALL;
263
}
264

    
265
void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
266
{
267
#ifdef DEBUG
268
    if (loglevel & CPU_LOG_INT) {
269
        fprintf(logfile, "kqemu_set_notdirty: addr=%08lx\n", 
270
                (unsigned long)ram_addr);
271
    }
272
#endif
273
    /* we only track transitions to dirty state */
274
    if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
275
        return;
276
    if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
277
        nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
278
    else
279
        ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
280
}
281

    
282
static void kqemu_reset_modified_ram_pages(void)
283
{
284
    int i;
285
    unsigned long page_index;
286

    
287
    for(i = 0; i < nb_modified_ram_pages; i++) {
288
        page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
289
        modified_ram_pages_table[page_index] = 0;
290
    }
291
    nb_modified_ram_pages = 0;
292
}
293

    
294
void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
295
{
296
    unsigned long page_index;
297
    int ret;
298
#ifdef _WIN32
299
    DWORD temp;
300
#endif
301

    
302
    page_index = ram_addr >> TARGET_PAGE_BITS;
303
    if (!modified_ram_pages_table[page_index]) {
304
#if 0
305
        printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
306
#endif
307
        modified_ram_pages_table[page_index] = 1;
308
        modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
309
        if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
310
            /* flush */
311
#ifdef _WIN32
312
            ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
313
                                  &nb_modified_ram_pages,
314
                                  sizeof(nb_modified_ram_pages),
315
                                  NULL, 0, &temp, NULL);
316
#else
317
            ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
318
                        &nb_modified_ram_pages);
319
#endif
320
            kqemu_reset_modified_ram_pages();
321
        }
322
    }
323
}
324

    
325
void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size, 
326
                        ram_addr_t phys_offset)
327
{
328
    struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
329
    uint64_t end;
330
    int ret, io_index;
331

    
332
    end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
333
    start_addr &= TARGET_PAGE_MASK;
334
    kphys_mem->phys_addr = start_addr;
335
    kphys_mem->size = end - start_addr;
336
    kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
337
    io_index = phys_offset & ~TARGET_PAGE_MASK;
338
    switch(io_index) {
339
    case IO_MEM_RAM:
340
        kphys_mem->io_index = KQEMU_IO_MEM_RAM;
341
        break;
342
    case IO_MEM_ROM:
343
        kphys_mem->io_index = KQEMU_IO_MEM_ROM;
344
        break;
345
    default:
346
        if (qpi_io_memory == io_index) {
347
            kphys_mem->io_index = KQEMU_IO_MEM_COMM;
348
        } else {
349
            kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
350
        }
351
        break;
352
    }
353
#ifdef _WIN32
354
    {
355
        DWORD temp;
356
        ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM, 
357
                              kphys_mem, sizeof(*kphys_mem),
358
                              NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
359
    }
360
#else
361
    ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
362
#endif
363
    if (ret < 0) {
364
        fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
365
                ret, start_addr, 
366
                (unsigned long)size, (unsigned long)phys_offset);
367
    }
368
}
369

    
370
struct fpstate {
371
    uint16_t fpuc;
372
    uint16_t dummy1;
373
    uint16_t fpus;
374
    uint16_t dummy2;
375
    uint16_t fptag;
376
    uint16_t dummy3;
377

    
378
    uint32_t fpip;
379
    uint32_t fpcs;
380
    uint32_t fpoo;
381
    uint32_t fpos;
382
    uint8_t fpregs1[8 * 10];
383
};
384

    
385
struct fpxstate {
386
    uint16_t fpuc;
387
    uint16_t fpus;
388
    uint16_t fptag;
389
    uint16_t fop;
390
    uint32_t fpuip;
391
    uint16_t cs_sel;
392
    uint16_t dummy0;
393
    uint32_t fpudp;
394
    uint16_t ds_sel;
395
    uint16_t dummy1;
396
    uint32_t mxcsr;
397
    uint32_t mxcsr_mask;
398
    uint8_t fpregs1[8 * 16];
399
    uint8_t xmm_regs[16 * 16];
400
    uint8_t dummy2[96];
401
};
402

    
403
static struct fpxstate fpx1 __attribute__((aligned(16)));
404

    
405
static void restore_native_fp_frstor(CPUState *env)
406
{
407
    int fptag, i, j;
408
    struct fpstate fp1, *fp = &fp1;
409

    
410
    fp->fpuc = env->fpuc;
411
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
412
    fptag = 0;
413
    for (i=7; i>=0; i--) {
414
        fptag <<= 2;
415
        if (env->fptags[i]) {
416
            fptag |= 3;
417
        } else {
418
            /* the FPU automatically computes it */
419
        }
420
    }
421
    fp->fptag = fptag;
422
    j = env->fpstt;
423
    for(i = 0;i < 8; i++) {
424
        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
425
        j = (j + 1) & 7;
426
    }
427
    asm volatile ("frstor %0" : "=m" (*fp));
428
}
429

    
430
static void save_native_fp_fsave(CPUState *env)
431
{
432
    int fptag, i, j;
433
    uint16_t fpuc;
434
    struct fpstate fp1, *fp = &fp1;
435

    
436
    asm volatile ("fsave %0" : : "m" (*fp));
437
    env->fpuc = fp->fpuc;
438
    env->fpstt = (fp->fpus >> 11) & 7;
439
    env->fpus = fp->fpus & ~0x3800;
440
    fptag = fp->fptag;
441
    for(i = 0;i < 8; i++) {
442
        env->fptags[i] = ((fptag & 3) == 3);
443
        fptag >>= 2;
444
    }
445
    j = env->fpstt;
446
    for(i = 0;i < 8; i++) {
447
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
448
        j = (j + 1) & 7;
449
    }
450
    /* we must restore the default rounding state */
451
    fpuc = 0x037f | (env->fpuc & (3 << 10));
452
    asm volatile("fldcw %0" : : "m" (fpuc));
453
}
454

    
455
static void restore_native_fp_fxrstor(CPUState *env)
456
{
457
    struct fpxstate *fp = &fpx1;
458
    int i, j, fptag;
459

    
460
    fp->fpuc = env->fpuc;
461
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
462
    fptag = 0;
463
    for(i = 0; i < 8; i++)
464
        fptag |= (env->fptags[i] << i);
465
    fp->fptag = fptag ^ 0xff;
466

    
467
    j = env->fpstt;
468
    for(i = 0;i < 8; i++) {
469
        memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
470
        j = (j + 1) & 7;
471
    }
472
    if (env->cpuid_features & CPUID_SSE) {
473
        fp->mxcsr = env->mxcsr;
474
        /* XXX: check if DAZ is not available */
475
        fp->mxcsr_mask = 0xffff;
476
        memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
477
    }
478
    asm volatile ("fxrstor %0" : "=m" (*fp));
479
}
480

    
481
static void save_native_fp_fxsave(CPUState *env)
482
{
483
    struct fpxstate *fp = &fpx1;
484
    int fptag, i, j;
485
    uint16_t fpuc;
486

    
487
    asm volatile ("fxsave %0" : : "m" (*fp));
488
    env->fpuc = fp->fpuc;
489
    env->fpstt = (fp->fpus >> 11) & 7;
490
    env->fpus = fp->fpus & ~0x3800;
491
    fptag = fp->fptag ^ 0xff;
492
    for(i = 0;i < 8; i++) {
493
        env->fptags[i] = (fptag >> i) & 1;
494
    }
495
    j = env->fpstt;
496
    for(i = 0;i < 8; i++) {
497
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
498
        j = (j + 1) & 7;
499
    }
500
    if (env->cpuid_features & CPUID_SSE) {
501
        env->mxcsr = fp->mxcsr;
502
        memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
503
    }
504

    
505
    /* we must restore the default rounding state */
506
    asm volatile ("fninit");
507
    fpuc = 0x037f | (env->fpuc & (3 << 10));
508
    asm volatile("fldcw %0" : : "m" (fpuc));
509
}
510

    
511
static int do_syscall(CPUState *env,
512
                      struct kqemu_cpu_state *kenv)
513
{
514
    int selector;
515

    
516
    selector = (env->star >> 32) & 0xffff;
517
#ifdef TARGET_X86_64
518
    if (env->hflags & HF_LMA_MASK) {
519
        int code64;
520

    
521
        env->regs[R_ECX] = kenv->next_eip;
522
        env->regs[11] = env->eflags;
523

    
524
        code64 = env->hflags & HF_CS64_MASK;
525

    
526
        cpu_x86_set_cpl(env, 0);
527
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
528
                               0, 0xffffffff,
529
                               DESC_G_MASK | DESC_P_MASK |
530
                               DESC_S_MASK |
531
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
532
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
533
                               0, 0xffffffff,
534
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
535
                               DESC_S_MASK |
536
                               DESC_W_MASK | DESC_A_MASK);
537
        env->eflags &= ~env->fmask;
538
        if (code64)
539
            env->eip = env->lstar;
540
        else
541
            env->eip = env->cstar;
542
    } else
543
#endif
544
    {
545
        env->regs[R_ECX] = (uint32_t)kenv->next_eip;
546

    
547
        cpu_x86_set_cpl(env, 0);
548
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
549
                           0, 0xffffffff,
550
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
551
                               DESC_S_MASK |
552
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
553
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
554
                               0, 0xffffffff,
555
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
556
                               DESC_S_MASK |
557
                               DESC_W_MASK | DESC_A_MASK);
558
        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
559
        env->eip = (uint32_t)env->star;
560
    }
561
    return 2;
562
}
563

    
564
#ifdef CONFIG_PROFILER
565

    
566
#define PC_REC_SIZE 1
567
#define PC_REC_HASH_BITS 16
568
#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
569

    
570
typedef struct PCRecord {
571
    unsigned long pc;
572
    int64_t count;
573
    struct PCRecord *next;
574
} PCRecord;
575

    
576
static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
577
static int nb_pc_records;
578

    
579
static void kqemu_record_pc(unsigned long pc)
580
{
581
    unsigned long h;
582
    PCRecord **pr, *r;
583

    
584
    h = pc / PC_REC_SIZE;
585
    h = h ^ (h >> PC_REC_HASH_BITS);
586
    h &= (PC_REC_HASH_SIZE - 1);
587
    pr = &pc_rec_hash[h];
588
    for(;;) {
589
        r = *pr;
590
        if (r == NULL)
591
            break;
592
        if (r->pc == pc) {
593
            r->count++;
594
            return;
595
        }
596
        pr = &r->next;
597
    }
598
    r = malloc(sizeof(PCRecord));
599
    r->count = 1;
600
    r->pc = pc;
601
    r->next = NULL;
602
    *pr = r;
603
    nb_pc_records++;
604
}
605

    
606
static int pc_rec_cmp(const void *p1, const void *p2)
607
{
608
    PCRecord *r1 = *(PCRecord **)p1;
609
    PCRecord *r2 = *(PCRecord **)p2;
610
    if (r1->count < r2->count)
611
        return 1;
612
    else if (r1->count == r2->count)
613
        return 0;
614
    else
615
        return -1;
616
}
617

    
618
static void kqemu_record_flush(void)
619
{
620
    PCRecord *r, *r_next;
621
    int h;
622

    
623
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
624
        for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
625
            r_next = r->next;
626
            free(r);
627
        }
628
        pc_rec_hash[h] = NULL;
629
    }
630
    nb_pc_records = 0;
631
}
632

    
633
void kqemu_record_dump(void)
634
{
635
    PCRecord **pr, *r;
636
    int i, h;
637
    FILE *f;
638
    int64_t total, sum;
639

    
640
    pr = malloc(sizeof(PCRecord *) * nb_pc_records);
641
    i = 0;
642
    total = 0;
643
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
644
        for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
645
            pr[i++] = r;
646
            total += r->count;
647
        }
648
    }
649
    qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
650

    
651
    f = fopen("/tmp/kqemu.stats", "w");
652
    if (!f) {
653
        perror("/tmp/kqemu.stats");
654
        exit(1);
655
    }
656
    fprintf(f, "total: %" PRId64 "\n", total);
657
    sum = 0;
658
    for(i = 0; i < nb_pc_records; i++) {
659
        r = pr[i];
660
        sum += r->count;
661
        fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
662
                r->pc,
663
                r->count,
664
                (double)r->count / (double)total * 100.0,
665
                (double)sum / (double)total * 100.0);
666
    }
667
    fclose(f);
668
    free(pr);
669

    
670
    kqemu_record_flush();
671
}
672
#endif
673

    
674
static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
675
                                  const SegmentCache *sc)
676
{
677
    ksc->selector = sc->selector;
678
    ksc->flags = sc->flags;
679
    ksc->limit = sc->limit;
680
    ksc->base = sc->base;
681
}
682

    
683
static inline void kqemu_save_seg(SegmentCache *sc,
684
                                  const struct kqemu_segment_cache *ksc)
685
{
686
    sc->selector = ksc->selector;
687
    sc->flags = ksc->flags;
688
    sc->limit = ksc->limit;
689
    sc->base = ksc->base;
690
}
691

    
692
int kqemu_cpu_exec(CPUState *env)
693
{
694
    struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
695
    int ret, cpl, i;
696
#ifdef CONFIG_PROFILER
697
    int64_t ti;
698
#endif
699
#ifdef _WIN32
700
    DWORD temp;
701
#endif
702

    
703
#ifdef CONFIG_PROFILER
704
    ti = profile_getclock();
705
#endif
706
#ifdef DEBUG
707
    if (loglevel & CPU_LOG_INT) {
708
        fprintf(logfile, "kqemu: cpu_exec: enter\n");
709
        cpu_dump_state(env, logfile, fprintf, 0);
710
    }
711
#endif
712
    for(i = 0; i < CPU_NB_REGS; i++)
713
        kenv->regs[i] = env->regs[i];
714
    kenv->eip = env->eip;
715
    kenv->eflags = env->eflags;
716
    for(i = 0; i < 6; i++)
717
        kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
718
    kqemu_load_seg(&kenv->ldt, &env->ldt);
719
    kqemu_load_seg(&kenv->tr, &env->tr);
720
    kqemu_load_seg(&kenv->gdt, &env->gdt);
721
    kqemu_load_seg(&kenv->idt, &env->idt);
722
    kenv->cr0 = env->cr[0];
723
    kenv->cr2 = env->cr[2];
724
    kenv->cr3 = env->cr[3];
725
    kenv->cr4 = env->cr[4];
726
    kenv->a20_mask = env->a20_mask;
727
    kenv->efer = env->efer;
728
    kenv->tsc_offset = 0;
729
    kenv->star = env->star;
730
    kenv->sysenter_cs = env->sysenter_cs;
731
    kenv->sysenter_esp = env->sysenter_esp;
732
    kenv->sysenter_eip = env->sysenter_eip;
733
#ifdef TARGET_X86_64
734
    kenv->lstar = env->lstar;
735
    kenv->cstar = env->cstar;
736
    kenv->fmask = env->fmask;
737
    kenv->kernelgsbase = env->kernelgsbase;
738
#endif
739
    if (env->dr[7] & 0xff) {
740
        kenv->dr7 = env->dr[7];
741
        kenv->dr0 = env->dr[0];
742
        kenv->dr1 = env->dr[1];
743
        kenv->dr2 = env->dr[2];
744
        kenv->dr3 = env->dr[3];
745
    } else {
746
        kenv->dr7 = 0;
747
    }
748
    kenv->dr6 = env->dr[6];
749
    cpl = (env->hflags & HF_CPL_MASK);
750
    kenv->cpl = cpl;
751
    kenv->nb_pages_to_flush = nb_pages_to_flush;
752
    kenv->user_only = (env->kqemu_enabled == 1);
753
    kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
754
    nb_ram_pages_to_update = 0;
755
    kenv->nb_modified_ram_pages = nb_modified_ram_pages;
756

    
757
    kqemu_reset_modified_ram_pages();
758

    
759
    if (env->cpuid_features & CPUID_FXSR)
760
        restore_native_fp_fxrstor(env);
761
    else
762
        restore_native_fp_frstor(env);
763

    
764
#ifdef _WIN32
765
    if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
766
                        kenv, sizeof(struct kqemu_cpu_state),
767
                        kenv, sizeof(struct kqemu_cpu_state),
768
                        &temp, NULL)) {
769
        ret = kenv->retval;
770
    } else {
771
        ret = -1;
772
    }
773
#else
774
    ioctl(kqemu_fd, KQEMU_EXEC, kenv);
775
    ret = kenv->retval;
776
#endif
777
    if (env->cpuid_features & CPUID_FXSR)
778
        save_native_fp_fxsave(env);
779
    else
780
        save_native_fp_fsave(env);
781

    
782
    for(i = 0; i < CPU_NB_REGS; i++)
783
        env->regs[i] = kenv->regs[i];
784
    env->eip = kenv->eip;
785
    env->eflags = kenv->eflags;
786
    for(i = 0; i < 6; i++)
787
        kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
788
    cpu_x86_set_cpl(env, kenv->cpl);
789
    kqemu_save_seg(&env->ldt, &kenv->ldt);
790
    env->cr[0] = kenv->cr0;
791
    env->cr[4] = kenv->cr4;
792
    env->cr[3] = kenv->cr3;
793
    env->cr[2] = kenv->cr2;
794
    env->dr[6] = kenv->dr6;
795
#ifdef TARGET_X86_64
796
    env->kernelgsbase = kenv->kernelgsbase;
797
#endif
798

    
799
    /* flush pages as indicated by kqemu */
800
    if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
801
        tlb_flush(env, 1);
802
    } else {
803
        for(i = 0; i < kenv->nb_pages_to_flush; i++) {
804
            tlb_flush_page(env, pages_to_flush[i]);
805
        }
806
    }
807
    nb_pages_to_flush = 0;
808

    
809
#ifdef CONFIG_PROFILER
810
    kqemu_time += profile_getclock() - ti;
811
    kqemu_exec_count++;
812
#endif
813

    
814
    if (kenv->nb_ram_pages_to_update > 0) {
815
        cpu_tlb_update_dirty(env);
816
    }
817

    
818
    if (kenv->nb_modified_ram_pages > 0) {
819
        for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
820
            unsigned long addr;
821
            addr = modified_ram_pages[i];
822
            tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
823
        }
824
    }
825

    
826
    /* restore the hidden flags */
827
    {
828
        unsigned int new_hflags;
829
#ifdef TARGET_X86_64
830
        if ((env->hflags & HF_LMA_MASK) &&
831
            (env->segs[R_CS].flags & DESC_L_MASK)) {
832
            /* long mode */
833
            new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
834
        } else
835
#endif
836
        {
837
            /* legacy / compatibility case */
838
            new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
839
                >> (DESC_B_SHIFT - HF_CS32_SHIFT);
840
            new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
841
                >> (DESC_B_SHIFT - HF_SS32_SHIFT);
842
            if (!(env->cr[0] & CR0_PE_MASK) ||
843
                   (env->eflags & VM_MASK) ||
844
                   !(env->hflags & HF_CS32_MASK)) {
845
                /* XXX: try to avoid this test. The problem comes from the
846
                   fact that is real mode or vm86 mode we only modify the
847
                   'base' and 'selector' fields of the segment cache to go
848
                   faster. A solution may be to force addseg to one in
849
                   translate-i386.c. */
850
                new_hflags |= HF_ADDSEG_MASK;
851
            } else {
852
                new_hflags |= ((env->segs[R_DS].base |
853
                                env->segs[R_ES].base |
854
                                env->segs[R_SS].base) != 0) <<
855
                    HF_ADDSEG_SHIFT;
856
            }
857
        }
858
        env->hflags = (env->hflags &
859
           ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
860
            new_hflags;
861
    }
862
    /* update FPU flags */
863
    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
864
        ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
865
    if (env->cr[4] & CR4_OSFXSR_MASK)
866
        env->hflags |= HF_OSFXSR_MASK;
867
    else
868
        env->hflags &= ~HF_OSFXSR_MASK;
869

    
870
#ifdef DEBUG
871
    if (loglevel & CPU_LOG_INT) {
872
        fprintf(logfile, "kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
873
    }
874
#endif
875
    if (ret == KQEMU_RET_SYSCALL) {
876
        /* syscall instruction */
877
        return do_syscall(env, kenv);
878
    } else
879
    if ((ret & 0xff00) == KQEMU_RET_INT) {
880
        env->exception_index = ret & 0xff;
881
        env->error_code = 0;
882
        env->exception_is_int = 1;
883
        env->exception_next_eip = kenv->next_eip;
884
#ifdef CONFIG_PROFILER
885
        kqemu_ret_int_count++;
886
#endif
887
#ifdef DEBUG
888
        if (loglevel & CPU_LOG_INT) {
889
            fprintf(logfile, "kqemu: interrupt v=%02x:\n",
890
                    env->exception_index);
891
            cpu_dump_state(env, logfile, fprintf, 0);
892
        }
893
#endif
894
        return 1;
895
    } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
896
        env->exception_index = ret & 0xff;
897
        env->error_code = kenv->error_code;
898
        env->exception_is_int = 0;
899
        env->exception_next_eip = 0;
900
#ifdef CONFIG_PROFILER
901
        kqemu_ret_excp_count++;
902
#endif
903
#ifdef DEBUG
904
        if (loglevel & CPU_LOG_INT) {
905
            fprintf(logfile, "kqemu: exception v=%02x e=%04x:\n",
906
                    env->exception_index, env->error_code);
907
            cpu_dump_state(env, logfile, fprintf, 0);
908
        }
909
#endif
910
        return 1;
911
    } else if (ret == KQEMU_RET_INTR) {
912
#ifdef CONFIG_PROFILER
913
        kqemu_ret_intr_count++;
914
#endif
915
#ifdef DEBUG
916
        if (loglevel & CPU_LOG_INT) {
917
            cpu_dump_state(env, logfile, fprintf, 0);
918
        }
919
#endif
920
        return 0;
921
    } else if (ret == KQEMU_RET_SOFTMMU) {
922
#ifdef CONFIG_PROFILER
923
        {
924
            unsigned long pc = env->eip + env->segs[R_CS].base;
925
            kqemu_record_pc(pc);
926
        }
927
#endif
928
#ifdef DEBUG
929
        if (loglevel & CPU_LOG_INT) {
930
            cpu_dump_state(env, logfile, fprintf, 0);
931
        }
932
#endif
933
        return 2;
934
    } else {
935
        cpu_dump_state(env, stderr, fprintf, 0);
936
        fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
937
        exit(1);
938
    }
939
    return 0;
940
}
941

    
942
void kqemu_cpu_interrupt(CPUState *env)
943
{
944
#if defined(_WIN32)
945
    /* cancelling the I/O request causes KQEMU to finish executing the
946
       current block and successfully returning. */
947
    CancelIo(kqemu_fd);
948
#endif
949
}
950

    
951
/* 
952
   QEMU paravirtualization interface. The current interface only
953
   allows to modify the IF and IOPL flags when running in
954
   kqemu.
955

956
   At this point it is not very satisfactory. I leave it for reference
957
   as it adds little complexity.
958
*/
959

    
960
#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
961

    
962
static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
963
{
964
    return 0;
965
}
966

    
967
static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
968
{
969
    return 0;
970
}
971

    
972
static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
973
{
974
}
975

    
976
static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
977
{
978
}
979

    
980
static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
981
{
982
    CPUState *env;
983

    
984
    env = cpu_single_env;
985
    if (!env)
986
        return 0;
987
    return env->eflags & (IF_MASK | IOPL_MASK);
988
}
989

    
990
/* Note: after writing to this address, the guest code must make sure
991
   it is exiting the current TB. pushf/popf can be used for that
992
   purpose. */
993
static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
994
{
995
    CPUState *env;
996

    
997
    env = cpu_single_env;
998
    if (!env)
999
        return;
1000
    env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) | 
1001
        (val & (IF_MASK | IOPL_MASK));
1002
}
1003

    
1004
static CPUReadMemoryFunc *qpi_mem_read[3] = {
1005
    qpi_mem_readb,
1006
    qpi_mem_readw,
1007
    qpi_mem_readl,
1008
};
1009

    
1010
static CPUWriteMemoryFunc *qpi_mem_write[3] = {
1011
    qpi_mem_writeb,
1012
    qpi_mem_writew,
1013
    qpi_mem_writel,
1014
};
1015

    
1016
static void qpi_init(void)
1017
{
1018
    kqemu_comm_base = 0xff000000 | 1;
1019
    qpi_io_memory = cpu_register_io_memory(0, 
1020
                                           qpi_mem_read, 
1021
                                           qpi_mem_write, NULL);
1022
    cpu_register_physical_memory(kqemu_comm_base & ~0xfff, 
1023
                                 0x1000, qpi_io_memory);
1024
}
1025
#endif