Statistics
| Branch: | Revision:

root / kqemu.c @ dfe5fff3

History | View | Annotate | Download (27.8 kB)

1
/*
2
 *  KQEMU support
3
 *
4
 *  Copyright (c) 2005-2008 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#include <winioctl.h>
23
#else
24
#include <sys/types.h>
25
#include <sys/mman.h>
26
#include <sys/ioctl.h>
27
#endif
28
#ifdef CONFIG_SOLARIS
29
#include <sys/ioccom.h>
30
#endif
31
#include <stdlib.h>
32
#include <stdio.h>
33
#include <stdarg.h>
34
#include <string.h>
35
#include <errno.h>
36
#include <unistd.h>
37
#include <inttypes.h>
38

    
39
#include "cpu.h"
40
#include "exec-all.h"
41
#include "qemu-common.h"
42

    
43
#ifdef CONFIG_KQEMU
44

    
45
#define DEBUG
46
//#define PROFILE
47

    
48

    
49
#ifdef DEBUG
50
#  define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
51
#  define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
52
#else
53
#  define LOG_INT(...) do { } while (0)
54
#  define LOG_INT_STATE(env) do { } while (0)
55
#endif
56

    
57
#include <unistd.h>
58
#include <fcntl.h>
59
#include "kqemu.h"
60

    
61
#ifdef _WIN32
62
#define KQEMU_DEVICE "\\\\.\\kqemu"
63
#else
64
#define KQEMU_DEVICE "/dev/kqemu"
65
#endif
66

    
67
static void qpi_init(void);
68

    
69
#ifdef _WIN32
70
#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
71
HANDLE kqemu_fd = KQEMU_INVALID_FD;
72
#define kqemu_closefd(x) CloseHandle(x)
73
#else
74
#define KQEMU_INVALID_FD -1
75
int kqemu_fd = KQEMU_INVALID_FD;
76
#define kqemu_closefd(x) close(x)
77
#endif
78

    
79
/* 0 = not allowed
80
   1 = user kqemu
81
   2 = kernel kqemu
82
*/
83
int kqemu_allowed = 0;
84
uint64_t *pages_to_flush;
85
unsigned int nb_pages_to_flush;
86
uint64_t *ram_pages_to_update;
87
unsigned int nb_ram_pages_to_update;
88
uint64_t *modified_ram_pages;
89
unsigned int nb_modified_ram_pages;
90
uint8_t *modified_ram_pages_table;
91
int qpi_io_memory;
92
uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
93
ram_addr_t kqemu_phys_ram_size;
94
uint8_t *kqemu_phys_ram_base;
95

    
96
#define cpuid(index, eax, ebx, ecx, edx) \
97
  asm volatile ("cpuid" \
98
                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
99
                : "0" (index))
100

    
101
#ifdef __x86_64__
102
static int is_cpuid_supported(void)
103
{
104
    return 1;
105
}
106
#else
107
static int is_cpuid_supported(void)
108
{
109
    int v0, v1;
110
    asm volatile ("pushf\n"
111
                  "popl %0\n"
112
                  "movl %0, %1\n"
113
                  "xorl $0x00200000, %0\n"
114
                  "pushl %0\n"
115
                  "popf\n"
116
                  "pushf\n"
117
                  "popl %0\n"
118
                  : "=a" (v0), "=d" (v1)
119
                  :
120
                  : "cc");
121
    return (v0 != v1);
122
}
123
#endif
124

    
125
static void kqemu_update_cpuid(CPUState *env)
126
{
127
    int critical_features_mask, features, ext_features, ext_features_mask;
128
    uint32_t eax, ebx, ecx, edx;
129

    
130
    /* the following features are kept identical on the host and
131
       target cpus because they are important for user code. Strictly
132
       speaking, only SSE really matters because the OS must support
133
       it if the user code uses it. */
134
    critical_features_mask =
135
        CPUID_CMOV | CPUID_CX8 |
136
        CPUID_FXSR | CPUID_MMX | CPUID_SSE |
137
        CPUID_SSE2 | CPUID_SEP;
138
    ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
139
    if (!is_cpuid_supported()) {
140
        features = 0;
141
        ext_features = 0;
142
    } else {
143
        cpuid(1, eax, ebx, ecx, edx);
144
        features = edx;
145
        ext_features = ecx;
146
    }
147
#ifdef __x86_64__
148
    /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
149
       compatibility mode, so in order to have the best performances
150
       it is better not to use it */
151
    features &= ~CPUID_SEP;
152
#endif
153
    env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
154
        (features & critical_features_mask);
155
    env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
156
        (ext_features & ext_features_mask);
157
    /* XXX: we could update more of the target CPUID state so that the
158
       non accelerated code sees exactly the same CPU features as the
159
       accelerated code */
160
}
161

    
162
int kqemu_init(CPUState *env)
163
{
164
    struct kqemu_init kinit;
165
    int ret, version;
166
#ifdef _WIN32
167
    DWORD temp;
168
#endif
169

    
170
    if (!kqemu_allowed)
171
        return -1;
172

    
173
#ifdef _WIN32
174
    kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
175
                          FILE_SHARE_READ | FILE_SHARE_WRITE,
176
                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
177
                          NULL);
178
    if (kqemu_fd == KQEMU_INVALID_FD) {
179
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
180
                KQEMU_DEVICE, GetLastError());
181
        return -1;
182
    }
183
#else
184
    kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
185
    if (kqemu_fd == KQEMU_INVALID_FD) {
186
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
187
                KQEMU_DEVICE, strerror(errno));
188
        return -1;
189
    }
190
#endif
191
    version = 0;
192
#ifdef _WIN32
193
    DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
194
                    &version, sizeof(version), &temp, NULL);
195
#else
196
    ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
197
#endif
198
    if (version != KQEMU_VERSION) {
199
        fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
200
                version, KQEMU_VERSION);
201
        goto fail;
202
    }
203

    
204
    pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
205
                                  sizeof(uint64_t));
206
    if (!pages_to_flush)
207
        goto fail;
208

    
209
    ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
210
                                       sizeof(uint64_t));
211
    if (!ram_pages_to_update)
212
        goto fail;
213

    
214
    modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
215
                                      sizeof(uint64_t));
216
    if (!modified_ram_pages)
217
        goto fail;
218
    modified_ram_pages_table =
219
        qemu_mallocz(kqemu_phys_ram_size >> TARGET_PAGE_BITS);
220
    if (!modified_ram_pages_table)
221
        goto fail;
222

    
223
    memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
224
    kinit.ram_base = kqemu_phys_ram_base;
225
    kinit.ram_size = kqemu_phys_ram_size;
226
    kinit.ram_dirty = phys_ram_dirty;
227
    kinit.pages_to_flush = pages_to_flush;
228
    kinit.ram_pages_to_update = ram_pages_to_update;
229
    kinit.modified_ram_pages = modified_ram_pages;
230
#ifdef _WIN32
231
    ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
232
                          NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
233
#else
234
    ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
235
#endif
236
    if (ret < 0) {
237
        fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
238
    fail:
239
        kqemu_closefd(kqemu_fd);
240
        kqemu_fd = KQEMU_INVALID_FD;
241
        return -1;
242
    }
243
    kqemu_update_cpuid(env);
244
    env->kqemu_enabled = kqemu_allowed;
245
    nb_pages_to_flush = 0;
246
    nb_ram_pages_to_update = 0;
247

    
248
    qpi_init();
249
    return 0;
250
}
251

    
252
void kqemu_flush_page(CPUState *env, target_ulong addr)
253
{
254
    LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
255
    if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
256
        nb_pages_to_flush = KQEMU_FLUSH_ALL;
257
    else
258
        pages_to_flush[nb_pages_to_flush++] = addr;
259
}
260

    
261
void kqemu_flush(CPUState *env, int global)
262
{
263
    LOG_INT("kqemu_flush:\n");
264
    nb_pages_to_flush = KQEMU_FLUSH_ALL;
265
}
266

    
267
void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
268
{
269
    LOG_INT("kqemu_set_notdirty: addr=%08lx\n", 
270
                (unsigned long)ram_addr);
271
    /* we only track transitions to dirty state */
272
    if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
273
        return;
274
    if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
275
        nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
276
    else
277
        ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
278
}
279

    
280
static void kqemu_reset_modified_ram_pages(void)
281
{
282
    int i;
283
    unsigned long page_index;
284

    
285
    for(i = 0; i < nb_modified_ram_pages; i++) {
286
        page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
287
        modified_ram_pages_table[page_index] = 0;
288
    }
289
    nb_modified_ram_pages = 0;
290
}
291

    
292
void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
293
{
294
    unsigned long page_index;
295
    int ret;
296
#ifdef _WIN32
297
    DWORD temp;
298
#endif
299

    
300
    page_index = ram_addr >> TARGET_PAGE_BITS;
301
    if (!modified_ram_pages_table[page_index]) {
302
#if 0
303
        printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
304
#endif
305
        modified_ram_pages_table[page_index] = 1;
306
        modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
307
        if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
308
            /* flush */
309
#ifdef _WIN32
310
            ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
311
                                  &nb_modified_ram_pages,
312
                                  sizeof(nb_modified_ram_pages),
313
                                  NULL, 0, &temp, NULL);
314
#else
315
            ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
316
                        &nb_modified_ram_pages);
317
#endif
318
            kqemu_reset_modified_ram_pages();
319
        }
320
    }
321
}
322

    
323
void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size, 
324
                        ram_addr_t phys_offset)
325
{
326
    struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
327
    uint64_t end;
328
    int ret, io_index;
329

    
330
    end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
331
    start_addr &= TARGET_PAGE_MASK;
332
    kphys_mem->phys_addr = start_addr;
333
    kphys_mem->size = end - start_addr;
334
    kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
335
    io_index = phys_offset & ~TARGET_PAGE_MASK;
336
    switch(io_index) {
337
    case IO_MEM_RAM:
338
        kphys_mem->io_index = KQEMU_IO_MEM_RAM;
339
        break;
340
    case IO_MEM_ROM:
341
        kphys_mem->io_index = KQEMU_IO_MEM_ROM;
342
        break;
343
    default:
344
        if (qpi_io_memory == io_index) {
345
            kphys_mem->io_index = KQEMU_IO_MEM_COMM;
346
        } else {
347
            kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
348
        }
349
        break;
350
    }
351
#ifdef _WIN32
352
    {
353
        DWORD temp;
354
        ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM, 
355
                              kphys_mem, sizeof(*kphys_mem),
356
                              NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
357
    }
358
#else
359
    ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
360
#endif
361
    if (ret < 0) {
362
        fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
363
                ret, start_addr, 
364
                (unsigned long)size, (unsigned long)phys_offset);
365
    }
366
}
367

    
368
struct fpstate {
369
    uint16_t fpuc;
370
    uint16_t dummy1;
371
    uint16_t fpus;
372
    uint16_t dummy2;
373
    uint16_t fptag;
374
    uint16_t dummy3;
375

    
376
    uint32_t fpip;
377
    uint32_t fpcs;
378
    uint32_t fpoo;
379
    uint32_t fpos;
380
    uint8_t fpregs1[8 * 10];
381
};
382

    
383
struct fpxstate {
384
    uint16_t fpuc;
385
    uint16_t fpus;
386
    uint16_t fptag;
387
    uint16_t fop;
388
    uint32_t fpuip;
389
    uint16_t cs_sel;
390
    uint16_t dummy0;
391
    uint32_t fpudp;
392
    uint16_t ds_sel;
393
    uint16_t dummy1;
394
    uint32_t mxcsr;
395
    uint32_t mxcsr_mask;
396
    uint8_t fpregs1[8 * 16];
397
    uint8_t xmm_regs[16 * 16];
398
    uint8_t dummy2[96];
399
};
400

    
401
static struct fpxstate fpx1 __attribute__((aligned(16)));
402

    
403
static void restore_native_fp_frstor(CPUState *env)
404
{
405
    int fptag, i, j;
406
    struct fpstate fp1, *fp = &fp1;
407

    
408
    fp->fpuc = env->fpuc;
409
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
410
    fptag = 0;
411
    for (i=7; i>=0; i--) {
412
        fptag <<= 2;
413
        if (env->fptags[i]) {
414
            fptag |= 3;
415
        } else {
416
            /* the FPU automatically computes it */
417
        }
418
    }
419
    fp->fptag = fptag;
420
    j = env->fpstt;
421
    for(i = 0;i < 8; i++) {
422
        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
423
        j = (j + 1) & 7;
424
    }
425
    asm volatile ("frstor %0" : "=m" (*fp));
426
}
427

    
428
static void save_native_fp_fsave(CPUState *env)
429
{
430
    int fptag, i, j;
431
    uint16_t fpuc;
432
    struct fpstate fp1, *fp = &fp1;
433

    
434
    asm volatile ("fsave %0" : : "m" (*fp));
435
    env->fpuc = fp->fpuc;
436
    env->fpstt = (fp->fpus >> 11) & 7;
437
    env->fpus = fp->fpus & ~0x3800;
438
    fptag = fp->fptag;
439
    for(i = 0;i < 8; i++) {
440
        env->fptags[i] = ((fptag & 3) == 3);
441
        fptag >>= 2;
442
    }
443
    j = env->fpstt;
444
    for(i = 0;i < 8; i++) {
445
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
446
        j = (j + 1) & 7;
447
    }
448
    /* we must restore the default rounding state */
449
    fpuc = 0x037f | (env->fpuc & (3 << 10));
450
    asm volatile("fldcw %0" : : "m" (fpuc));
451
}
452

    
453
static void restore_native_fp_fxrstor(CPUState *env)
454
{
455
    struct fpxstate *fp = &fpx1;
456
    int i, j, fptag;
457

    
458
    fp->fpuc = env->fpuc;
459
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
460
    fptag = 0;
461
    for(i = 0; i < 8; i++)
462
        fptag |= (env->fptags[i] << i);
463
    fp->fptag = fptag ^ 0xff;
464

    
465
    j = env->fpstt;
466
    for(i = 0;i < 8; i++) {
467
        memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
468
        j = (j + 1) & 7;
469
    }
470
    if (env->cpuid_features & CPUID_SSE) {
471
        fp->mxcsr = env->mxcsr;
472
        /* XXX: check if DAZ is not available */
473
        fp->mxcsr_mask = 0xffff;
474
        memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
475
    }
476
    asm volatile ("fxrstor %0" : "=m" (*fp));
477
}
478

    
479
static void save_native_fp_fxsave(CPUState *env)
480
{
481
    struct fpxstate *fp = &fpx1;
482
    int fptag, i, j;
483
    uint16_t fpuc;
484

    
485
    asm volatile ("fxsave %0" : : "m" (*fp));
486
    env->fpuc = fp->fpuc;
487
    env->fpstt = (fp->fpus >> 11) & 7;
488
    env->fpus = fp->fpus & ~0x3800;
489
    fptag = fp->fptag ^ 0xff;
490
    for(i = 0;i < 8; i++) {
491
        env->fptags[i] = (fptag >> i) & 1;
492
    }
493
    j = env->fpstt;
494
    for(i = 0;i < 8; i++) {
495
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
496
        j = (j + 1) & 7;
497
    }
498
    if (env->cpuid_features & CPUID_SSE) {
499
        env->mxcsr = fp->mxcsr;
500
        memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
501
    }
502

    
503
    /* we must restore the default rounding state */
504
    asm volatile ("fninit");
505
    fpuc = 0x037f | (env->fpuc & (3 << 10));
506
    asm volatile("fldcw %0" : : "m" (fpuc));
507
}
508

    
509
static int do_syscall(CPUState *env,
510
                      struct kqemu_cpu_state *kenv)
511
{
512
    int selector;
513

    
514
    selector = (env->star >> 32) & 0xffff;
515
#ifdef TARGET_X86_64
516
    if (env->hflags & HF_LMA_MASK) {
517
        int code64;
518

    
519
        env->regs[R_ECX] = kenv->next_eip;
520
        env->regs[11] = env->eflags;
521

    
522
        code64 = env->hflags & HF_CS64_MASK;
523

    
524
        cpu_x86_set_cpl(env, 0);
525
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
526
                               0, 0xffffffff,
527
                               DESC_G_MASK | DESC_P_MASK |
528
                               DESC_S_MASK |
529
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
530
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
531
                               0, 0xffffffff,
532
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
533
                               DESC_S_MASK |
534
                               DESC_W_MASK | DESC_A_MASK);
535
        env->eflags &= ~env->fmask;
536
        if (code64)
537
            env->eip = env->lstar;
538
        else
539
            env->eip = env->cstar;
540
    } else
541
#endif
542
    {
543
        env->regs[R_ECX] = (uint32_t)kenv->next_eip;
544

    
545
        cpu_x86_set_cpl(env, 0);
546
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
547
                           0, 0xffffffff,
548
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
549
                               DESC_S_MASK |
550
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
551
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
552
                               0, 0xffffffff,
553
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
554
                               DESC_S_MASK |
555
                               DESC_W_MASK | DESC_A_MASK);
556
        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
557
        env->eip = (uint32_t)env->star;
558
    }
559
    return 2;
560
}
561

    
562
#ifdef CONFIG_PROFILER
563

    
564
#define PC_REC_SIZE 1
565
#define PC_REC_HASH_BITS 16
566
#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
567

    
568
typedef struct PCRecord {
569
    unsigned long pc;
570
    int64_t count;
571
    struct PCRecord *next;
572
} PCRecord;
573

    
574
static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
575
static int nb_pc_records;
576

    
577
static void kqemu_record_pc(unsigned long pc)
578
{
579
    unsigned long h;
580
    PCRecord **pr, *r;
581

    
582
    h = pc / PC_REC_SIZE;
583
    h = h ^ (h >> PC_REC_HASH_BITS);
584
    h &= (PC_REC_HASH_SIZE - 1);
585
    pr = &pc_rec_hash[h];
586
    for(;;) {
587
        r = *pr;
588
        if (r == NULL)
589
            break;
590
        if (r->pc == pc) {
591
            r->count++;
592
            return;
593
        }
594
        pr = &r->next;
595
    }
596
    r = malloc(sizeof(PCRecord));
597
    r->count = 1;
598
    r->pc = pc;
599
    r->next = NULL;
600
    *pr = r;
601
    nb_pc_records++;
602
}
603

    
604
static int pc_rec_cmp(const void *p1, const void *p2)
605
{
606
    PCRecord *r1 = *(PCRecord **)p1;
607
    PCRecord *r2 = *(PCRecord **)p2;
608
    if (r1->count < r2->count)
609
        return 1;
610
    else if (r1->count == r2->count)
611
        return 0;
612
    else
613
        return -1;
614
}
615

    
616
static void kqemu_record_flush(void)
617
{
618
    PCRecord *r, *r_next;
619
    int h;
620

    
621
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
622
        for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
623
            r_next = r->next;
624
            free(r);
625
        }
626
        pc_rec_hash[h] = NULL;
627
    }
628
    nb_pc_records = 0;
629
}
630

    
631
void kqemu_record_dump(void)
632
{
633
    PCRecord **pr, *r;
634
    int i, h;
635
    FILE *f;
636
    int64_t total, sum;
637

    
638
    pr = malloc(sizeof(PCRecord *) * nb_pc_records);
639
    i = 0;
640
    total = 0;
641
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
642
        for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
643
            pr[i++] = r;
644
            total += r->count;
645
        }
646
    }
647
    qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
648

    
649
    f = fopen("/tmp/kqemu.stats", "w");
650
    if (!f) {
651
        perror("/tmp/kqemu.stats");
652
        exit(1);
653
    }
654
    fprintf(f, "total: %" PRId64 "\n", total);
655
    sum = 0;
656
    for(i = 0; i < nb_pc_records; i++) {
657
        r = pr[i];
658
        sum += r->count;
659
        fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
660
                r->pc,
661
                r->count,
662
                (double)r->count / (double)total * 100.0,
663
                (double)sum / (double)total * 100.0);
664
    }
665
    fclose(f);
666
    free(pr);
667

    
668
    kqemu_record_flush();
669
}
670
#endif
671

    
672
static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
673
                                  const SegmentCache *sc)
674
{
675
    ksc->selector = sc->selector;
676
    ksc->flags = sc->flags;
677
    ksc->limit = sc->limit;
678
    ksc->base = sc->base;
679
}
680

    
681
static inline void kqemu_save_seg(SegmentCache *sc,
682
                                  const struct kqemu_segment_cache *ksc)
683
{
684
    sc->selector = ksc->selector;
685
    sc->flags = ksc->flags;
686
    sc->limit = ksc->limit;
687
    sc->base = ksc->base;
688
}
689

    
690
int kqemu_cpu_exec(CPUState *env)
691
{
692
    struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
693
    int ret, cpl, i;
694
#ifdef CONFIG_PROFILER
695
    int64_t ti;
696
#endif
697
#ifdef _WIN32
698
    DWORD temp;
699
#endif
700

    
701
#ifdef CONFIG_PROFILER
702
    ti = profile_getclock();
703
#endif
704
    LOG_INT("kqemu: cpu_exec: enter\n");
705
    LOG_INT_STATE(env);
706
    for(i = 0; i < CPU_NB_REGS; i++)
707
        kenv->regs[i] = env->regs[i];
708
    kenv->eip = env->eip;
709
    kenv->eflags = env->eflags;
710
    for(i = 0; i < 6; i++)
711
        kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
712
    kqemu_load_seg(&kenv->ldt, &env->ldt);
713
    kqemu_load_seg(&kenv->tr, &env->tr);
714
    kqemu_load_seg(&kenv->gdt, &env->gdt);
715
    kqemu_load_seg(&kenv->idt, &env->idt);
716
    kenv->cr0 = env->cr[0];
717
    kenv->cr2 = env->cr[2];
718
    kenv->cr3 = env->cr[3];
719
    kenv->cr4 = env->cr[4];
720
    kenv->a20_mask = env->a20_mask;
721
    kenv->efer = env->efer;
722
    kenv->tsc_offset = 0;
723
    kenv->star = env->star;
724
    kenv->sysenter_cs = env->sysenter_cs;
725
    kenv->sysenter_esp = env->sysenter_esp;
726
    kenv->sysenter_eip = env->sysenter_eip;
727
#ifdef TARGET_X86_64
728
    kenv->lstar = env->lstar;
729
    kenv->cstar = env->cstar;
730
    kenv->fmask = env->fmask;
731
    kenv->kernelgsbase = env->kernelgsbase;
732
#endif
733
    if (env->dr[7] & 0xff) {
734
        kenv->dr7 = env->dr[7];
735
        kenv->dr0 = env->dr[0];
736
        kenv->dr1 = env->dr[1];
737
        kenv->dr2 = env->dr[2];
738
        kenv->dr3 = env->dr[3];
739
    } else {
740
        kenv->dr7 = 0;
741
    }
742
    kenv->dr6 = env->dr[6];
743
    cpl = (env->hflags & HF_CPL_MASK);
744
    kenv->cpl = cpl;
745
    kenv->nb_pages_to_flush = nb_pages_to_flush;
746
    kenv->user_only = (env->kqemu_enabled == 1);
747
    kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
748
    nb_ram_pages_to_update = 0;
749
    kenv->nb_modified_ram_pages = nb_modified_ram_pages;
750

    
751
    kqemu_reset_modified_ram_pages();
752

    
753
    if (env->cpuid_features & CPUID_FXSR)
754
        restore_native_fp_fxrstor(env);
755
    else
756
        restore_native_fp_frstor(env);
757

    
758
#ifdef _WIN32
759
    if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
760
                        kenv, sizeof(struct kqemu_cpu_state),
761
                        kenv, sizeof(struct kqemu_cpu_state),
762
                        &temp, NULL)) {
763
        ret = kenv->retval;
764
    } else {
765
        ret = -1;
766
    }
767
#else
768
    ioctl(kqemu_fd, KQEMU_EXEC, kenv);
769
    ret = kenv->retval;
770
#endif
771
    if (env->cpuid_features & CPUID_FXSR)
772
        save_native_fp_fxsave(env);
773
    else
774
        save_native_fp_fsave(env);
775

    
776
    for(i = 0; i < CPU_NB_REGS; i++)
777
        env->regs[i] = kenv->regs[i];
778
    env->eip = kenv->eip;
779
    env->eflags = kenv->eflags;
780
    for(i = 0; i < 6; i++)
781
        kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
782
    cpu_x86_set_cpl(env, kenv->cpl);
783
    kqemu_save_seg(&env->ldt, &kenv->ldt);
784
    env->cr[0] = kenv->cr0;
785
    env->cr[4] = kenv->cr4;
786
    env->cr[3] = kenv->cr3;
787
    env->cr[2] = kenv->cr2;
788
    env->dr[6] = kenv->dr6;
789
#ifdef TARGET_X86_64
790
    env->kernelgsbase = kenv->kernelgsbase;
791
#endif
792

    
793
    /* flush pages as indicated by kqemu */
794
    if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
795
        tlb_flush(env, 1);
796
    } else {
797
        for(i = 0; i < kenv->nb_pages_to_flush; i++) {
798
            tlb_flush_page(env, pages_to_flush[i]);
799
        }
800
    }
801
    nb_pages_to_flush = 0;
802

    
803
#ifdef CONFIG_PROFILER
804
    kqemu_time += profile_getclock() - ti;
805
    kqemu_exec_count++;
806
#endif
807

    
808
    if (kenv->nb_ram_pages_to_update > 0) {
809
        cpu_tlb_update_dirty(env);
810
    }
811

    
812
    if (kenv->nb_modified_ram_pages > 0) {
813
        for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
814
            unsigned long addr;
815
            addr = modified_ram_pages[i];
816
            tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
817
        }
818
    }
819

    
820
    /* restore the hidden flags */
821
    {
822
        unsigned int new_hflags;
823
#ifdef TARGET_X86_64
824
        if ((env->hflags & HF_LMA_MASK) &&
825
            (env->segs[R_CS].flags & DESC_L_MASK)) {
826
            /* long mode */
827
            new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
828
        } else
829
#endif
830
        {
831
            /* legacy / compatibility case */
832
            new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
833
                >> (DESC_B_SHIFT - HF_CS32_SHIFT);
834
            new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
835
                >> (DESC_B_SHIFT - HF_SS32_SHIFT);
836
            if (!(env->cr[0] & CR0_PE_MASK) ||
837
                   (env->eflags & VM_MASK) ||
838
                   !(env->hflags & HF_CS32_MASK)) {
839
                /* XXX: try to avoid this test. The problem comes from the
840
                   fact that is real mode or vm86 mode we only modify the
841
                   'base' and 'selector' fields of the segment cache to go
842
                   faster. A solution may be to force addseg to one in
843
                   translate-i386.c. */
844
                new_hflags |= HF_ADDSEG_MASK;
845
            } else {
846
                new_hflags |= ((env->segs[R_DS].base |
847
                                env->segs[R_ES].base |
848
                                env->segs[R_SS].base) != 0) <<
849
                    HF_ADDSEG_SHIFT;
850
            }
851
        }
852
        env->hflags = (env->hflags &
853
           ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
854
            new_hflags;
855
    }
856
    /* update FPU flags */
857
    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
858
        ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
859
    if (env->cr[4] & CR4_OSFXSR_MASK)
860
        env->hflags |= HF_OSFXSR_MASK;
861
    else
862
        env->hflags &= ~HF_OSFXSR_MASK;
863

    
864
    LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
865
    if (ret == KQEMU_RET_SYSCALL) {
866
        /* syscall instruction */
867
        return do_syscall(env, kenv);
868
    } else
869
    if ((ret & 0xff00) == KQEMU_RET_INT) {
870
        env->exception_index = ret & 0xff;
871
        env->error_code = 0;
872
        env->exception_is_int = 1;
873
        env->exception_next_eip = kenv->next_eip;
874
#ifdef CONFIG_PROFILER
875
        kqemu_ret_int_count++;
876
#endif
877
        LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
878
        LOG_INT_STATE(env);
879
        return 1;
880
    } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
881
        env->exception_index = ret & 0xff;
882
        env->error_code = kenv->error_code;
883
        env->exception_is_int = 0;
884
        env->exception_next_eip = 0;
885
#ifdef CONFIG_PROFILER
886
        kqemu_ret_excp_count++;
887
#endif
888
        LOG_INT("kqemu: exception v=%02x e=%04x:\n",
889
                    env->exception_index, env->error_code);
890
        LOG_INT_STATE(env);
891
        return 1;
892
    } else if (ret == KQEMU_RET_INTR) {
893
#ifdef CONFIG_PROFILER
894
        kqemu_ret_intr_count++;
895
#endif
896
        LOG_INT_STATE(env);
897
        return 0;
898
    } else if (ret == KQEMU_RET_SOFTMMU) {
899
#ifdef CONFIG_PROFILER
900
        {
901
            unsigned long pc = env->eip + env->segs[R_CS].base;
902
            kqemu_record_pc(pc);
903
        }
904
#endif
905
        LOG_INT_STATE(env);
906
        return 2;
907
    } else {
908
        cpu_dump_state(env, stderr, fprintf, 0);
909
        fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
910
        exit(1);
911
    }
912
    return 0;
913
}
914

    
915
void kqemu_cpu_interrupt(CPUState *env)
916
{
917
#if defined(_WIN32)
918
    /* cancelling the I/O request causes KQEMU to finish executing the
919
       current block and successfully returning. */
920
    CancelIo(kqemu_fd);
921
#endif
922
}
923

    
924
/* 
925
   QEMU paravirtualization interface. The current interface only
926
   allows to modify the IF and IOPL flags when running in
927
   kqemu.
928

929
   At this point it is not very satisfactory. I leave it for reference
930
   as it adds little complexity.
931
*/
932

    
933
#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
934

    
935
static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
936
{
937
    return 0;
938
}
939

    
940
static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
941
{
942
    return 0;
943
}
944

    
945
static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
946
{
947
}
948

    
949
static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
950
{
951
}
952

    
953
static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
954
{
955
    CPUState *env;
956

    
957
    env = cpu_single_env;
958
    if (!env)
959
        return 0;
960
    return env->eflags & (IF_MASK | IOPL_MASK);
961
}
962

    
963
/* Note: after writing to this address, the guest code must make sure
964
   it is exiting the current TB. pushf/popf can be used for that
965
   purpose. */
966
static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
967
{
968
    CPUState *env;
969

    
970
    env = cpu_single_env;
971
    if (!env)
972
        return;
973
    env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) | 
974
        (val & (IF_MASK | IOPL_MASK));
975
}
976

    
977
static CPUReadMemoryFunc *qpi_mem_read[3] = {
978
    qpi_mem_readb,
979
    qpi_mem_readw,
980
    qpi_mem_readl,
981
};
982

    
983
static CPUWriteMemoryFunc *qpi_mem_write[3] = {
984
    qpi_mem_writeb,
985
    qpi_mem_writew,
986
    qpi_mem_writel,
987
};
988

    
989
static void qpi_init(void)
990
{
991
    kqemu_comm_base = 0xff000000 | 1;
992
    qpi_io_memory = cpu_register_io_memory(
993
                                           qpi_mem_read, 
994
                                           qpi_mem_write, NULL);
995
    cpu_register_physical_memory(kqemu_comm_base & ~0xfff, 
996
                                 0x1000, qpi_io_memory);
997
}
998
#endif