Statistics
| Branch: | Revision:

root / kqemu.c @ d12d51d5

History | View | Annotate | Download (28 kB)

1
/*
2
 *  KQEMU support
3
 *
4
 *  Copyright (c) 2005-2008 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
19
 */
20
#include "config.h"
21
#ifdef _WIN32
22
#define WIN32_LEAN_AND_MEAN
23
#include <windows.h>
24
#include <winioctl.h>
25
#else
26
#include <sys/types.h>
27
#include <sys/mman.h>
28
#include <sys/ioctl.h>
29
#endif
30
#ifdef HOST_SOLARIS
31
#include <sys/ioccom.h>
32
#endif
33
#include <stdlib.h>
34
#include <stdio.h>
35
#include <stdarg.h>
36
#include <string.h>
37
#include <errno.h>
38
#include <unistd.h>
39
#include <inttypes.h>
40

    
41
#include "cpu.h"
42
#include "exec-all.h"
43
#include "qemu-common.h"
44

    
45
#ifdef USE_KQEMU
46

    
47
#define DEBUG
48
//#define PROFILE
49

    
50

    
51
#ifdef DEBUG
52
#  define LOG_INT(...) do {              \
53
     if (loglevel & CPU_LOG_INT)         \
54
       fprintf(logfile, ## __VA_ARGS__); \
55
   } while (0)
56
#  define LOG_INT_STATE(env) \
57
      do {                                            \
58
         if (loglevel & CPU_LOG_INT)                  \
59
            cpu_dump_state(env, logfile, fprintf, 0); \
60
      } while (0)
61
#else
62
#  define LOG_INT(...) do { } while (0)
63
#  define LOG_INT_STATE(env) do { } while (0)
64
#endif
65

    
66
#include <unistd.h>
67
#include <fcntl.h>
68
#include "kqemu.h"
69

    
70
#ifdef _WIN32
71
#define KQEMU_DEVICE "\\\\.\\kqemu"
72
#else
73
#define KQEMU_DEVICE "/dev/kqemu"
74
#endif
75

    
76
static void qpi_init(void);
77

    
78
#ifdef _WIN32
79
#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
80
HANDLE kqemu_fd = KQEMU_INVALID_FD;
81
#define kqemu_closefd(x) CloseHandle(x)
82
#else
83
#define KQEMU_INVALID_FD -1
84
int kqemu_fd = KQEMU_INVALID_FD;
85
#define kqemu_closefd(x) close(x)
86
#endif
87

    
88
/* 0 = not allowed
89
   1 = user kqemu
90
   2 = kernel kqemu
91
*/
92
int kqemu_allowed = 1;
93
uint64_t *pages_to_flush;
94
unsigned int nb_pages_to_flush;
95
uint64_t *ram_pages_to_update;
96
unsigned int nb_ram_pages_to_update;
97
uint64_t *modified_ram_pages;
98
unsigned int nb_modified_ram_pages;
99
uint8_t *modified_ram_pages_table;
100
int qpi_io_memory;
101
uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
102

    
103
#define cpuid(index, eax, ebx, ecx, edx) \
104
  asm volatile ("cpuid" \
105
                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
106
                : "0" (index))
107

    
108
#ifdef __x86_64__
109
static int is_cpuid_supported(void)
110
{
111
    return 1;
112
}
113
#else
114
static int is_cpuid_supported(void)
115
{
116
    int v0, v1;
117
    asm volatile ("pushf\n"
118
                  "popl %0\n"
119
                  "movl %0, %1\n"
120
                  "xorl $0x00200000, %0\n"
121
                  "pushl %0\n"
122
                  "popf\n"
123
                  "pushf\n"
124
                  "popl %0\n"
125
                  : "=a" (v0), "=d" (v1)
126
                  :
127
                  : "cc");
128
    return (v0 != v1);
129
}
130
#endif
131

    
132
static void kqemu_update_cpuid(CPUState *env)
133
{
134
    int critical_features_mask, features, ext_features, ext_features_mask;
135
    uint32_t eax, ebx, ecx, edx;
136

    
137
    /* the following features are kept identical on the host and
138
       target cpus because they are important for user code. Strictly
139
       speaking, only SSE really matters because the OS must support
140
       it if the user code uses it. */
141
    critical_features_mask =
142
        CPUID_CMOV | CPUID_CX8 |
143
        CPUID_FXSR | CPUID_MMX | CPUID_SSE |
144
        CPUID_SSE2 | CPUID_SEP;
145
    ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
146
    if (!is_cpuid_supported()) {
147
        features = 0;
148
        ext_features = 0;
149
    } else {
150
        cpuid(1, eax, ebx, ecx, edx);
151
        features = edx;
152
        ext_features = ecx;
153
    }
154
#ifdef __x86_64__
155
    /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
156
       compatibility mode, so in order to have the best performances
157
       it is better not to use it */
158
    features &= ~CPUID_SEP;
159
#endif
160
    env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
161
        (features & critical_features_mask);
162
    env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
163
        (ext_features & ext_features_mask);
164
    /* XXX: we could update more of the target CPUID state so that the
165
       non accelerated code sees exactly the same CPU features as the
166
       accelerated code */
167
}
168

    
169
int kqemu_init(CPUState *env)
170
{
171
    struct kqemu_init kinit;
172
    int ret, version;
173
#ifdef _WIN32
174
    DWORD temp;
175
#endif
176

    
177
    if (!kqemu_allowed)
178
        return -1;
179

    
180
#ifdef _WIN32
181
    kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
182
                          FILE_SHARE_READ | FILE_SHARE_WRITE,
183
                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
184
                          NULL);
185
    if (kqemu_fd == KQEMU_INVALID_FD) {
186
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
187
                KQEMU_DEVICE, GetLastError());
188
        return -1;
189
    }
190
#else
191
    kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
192
    if (kqemu_fd == KQEMU_INVALID_FD) {
193
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
194
                KQEMU_DEVICE, strerror(errno));
195
        return -1;
196
    }
197
#endif
198
    version = 0;
199
#ifdef _WIN32
200
    DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
201
                    &version, sizeof(version), &temp, NULL);
202
#else
203
    ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
204
#endif
205
    if (version != KQEMU_VERSION) {
206
        fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
207
                version, KQEMU_VERSION);
208
        goto fail;
209
    }
210

    
211
    pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
212
                                  sizeof(uint64_t));
213
    if (!pages_to_flush)
214
        goto fail;
215

    
216
    ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
217
                                       sizeof(uint64_t));
218
    if (!ram_pages_to_update)
219
        goto fail;
220

    
221
    modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
222
                                      sizeof(uint64_t));
223
    if (!modified_ram_pages)
224
        goto fail;
225
    modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
226
    if (!modified_ram_pages_table)
227
        goto fail;
228

    
229
    memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
230
    kinit.ram_base = phys_ram_base;
231
    kinit.ram_size = phys_ram_size;
232
    kinit.ram_dirty = phys_ram_dirty;
233
    kinit.pages_to_flush = pages_to_flush;
234
    kinit.ram_pages_to_update = ram_pages_to_update;
235
    kinit.modified_ram_pages = modified_ram_pages;
236
#ifdef _WIN32
237
    ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
238
                          NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
239
#else
240
    ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
241
#endif
242
    if (ret < 0) {
243
        fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
244
    fail:
245
        kqemu_closefd(kqemu_fd);
246
        kqemu_fd = KQEMU_INVALID_FD;
247
        return -1;
248
    }
249
    kqemu_update_cpuid(env);
250
    env->kqemu_enabled = kqemu_allowed;
251
    nb_pages_to_flush = 0;
252
    nb_ram_pages_to_update = 0;
253

    
254
    qpi_init();
255
    return 0;
256
}
257

    
258
void kqemu_flush_page(CPUState *env, target_ulong addr)
259
{
260
    LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
261
    if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
262
        nb_pages_to_flush = KQEMU_FLUSH_ALL;
263
    else
264
        pages_to_flush[nb_pages_to_flush++] = addr;
265
}
266

    
267
void kqemu_flush(CPUState *env, int global)
268
{
269
    LOG_INT("kqemu_flush:\n");
270
    nb_pages_to_flush = KQEMU_FLUSH_ALL;
271
}
272

    
273
void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
274
{
275
    LOG_INT("kqemu_set_notdirty: addr=%08lx\n", 
276
                (unsigned long)ram_addr);
277
    /* we only track transitions to dirty state */
278
    if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
279
        return;
280
    if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
281
        nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
282
    else
283
        ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
284
}
285

    
286
static void kqemu_reset_modified_ram_pages(void)
287
{
288
    int i;
289
    unsigned long page_index;
290

    
291
    for(i = 0; i < nb_modified_ram_pages; i++) {
292
        page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
293
        modified_ram_pages_table[page_index] = 0;
294
    }
295
    nb_modified_ram_pages = 0;
296
}
297

    
298
void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
299
{
300
    unsigned long page_index;
301
    int ret;
302
#ifdef _WIN32
303
    DWORD temp;
304
#endif
305

    
306
    page_index = ram_addr >> TARGET_PAGE_BITS;
307
    if (!modified_ram_pages_table[page_index]) {
308
#if 0
309
        printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
310
#endif
311
        modified_ram_pages_table[page_index] = 1;
312
        modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
313
        if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
314
            /* flush */
315
#ifdef _WIN32
316
            ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
317
                                  &nb_modified_ram_pages,
318
                                  sizeof(nb_modified_ram_pages),
319
                                  NULL, 0, &temp, NULL);
320
#else
321
            ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
322
                        &nb_modified_ram_pages);
323
#endif
324
            kqemu_reset_modified_ram_pages();
325
        }
326
    }
327
}
328

    
329
void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size, 
330
                        ram_addr_t phys_offset)
331
{
332
    struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
333
    uint64_t end;
334
    int ret, io_index;
335

    
336
    end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
337
    start_addr &= TARGET_PAGE_MASK;
338
    kphys_mem->phys_addr = start_addr;
339
    kphys_mem->size = end - start_addr;
340
    kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
341
    io_index = phys_offset & ~TARGET_PAGE_MASK;
342
    switch(io_index) {
343
    case IO_MEM_RAM:
344
        kphys_mem->io_index = KQEMU_IO_MEM_RAM;
345
        break;
346
    case IO_MEM_ROM:
347
        kphys_mem->io_index = KQEMU_IO_MEM_ROM;
348
        break;
349
    default:
350
        if (qpi_io_memory == io_index) {
351
            kphys_mem->io_index = KQEMU_IO_MEM_COMM;
352
        } else {
353
            kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
354
        }
355
        break;
356
    }
357
#ifdef _WIN32
358
    {
359
        DWORD temp;
360
        ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM, 
361
                              kphys_mem, sizeof(*kphys_mem),
362
                              NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
363
    }
364
#else
365
    ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
366
#endif
367
    if (ret < 0) {
368
        fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
369
                ret, start_addr, 
370
                (unsigned long)size, (unsigned long)phys_offset);
371
    }
372
}
373

    
374
struct fpstate {
375
    uint16_t fpuc;
376
    uint16_t dummy1;
377
    uint16_t fpus;
378
    uint16_t dummy2;
379
    uint16_t fptag;
380
    uint16_t dummy3;
381

    
382
    uint32_t fpip;
383
    uint32_t fpcs;
384
    uint32_t fpoo;
385
    uint32_t fpos;
386
    uint8_t fpregs1[8 * 10];
387
};
388

    
389
struct fpxstate {
390
    uint16_t fpuc;
391
    uint16_t fpus;
392
    uint16_t fptag;
393
    uint16_t fop;
394
    uint32_t fpuip;
395
    uint16_t cs_sel;
396
    uint16_t dummy0;
397
    uint32_t fpudp;
398
    uint16_t ds_sel;
399
    uint16_t dummy1;
400
    uint32_t mxcsr;
401
    uint32_t mxcsr_mask;
402
    uint8_t fpregs1[8 * 16];
403
    uint8_t xmm_regs[16 * 16];
404
    uint8_t dummy2[96];
405
};
406

    
407
static struct fpxstate fpx1 __attribute__((aligned(16)));
408

    
409
static void restore_native_fp_frstor(CPUState *env)
410
{
411
    int fptag, i, j;
412
    struct fpstate fp1, *fp = &fp1;
413

    
414
    fp->fpuc = env->fpuc;
415
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
416
    fptag = 0;
417
    for (i=7; i>=0; i--) {
418
        fptag <<= 2;
419
        if (env->fptags[i]) {
420
            fptag |= 3;
421
        } else {
422
            /* the FPU automatically computes it */
423
        }
424
    }
425
    fp->fptag = fptag;
426
    j = env->fpstt;
427
    for(i = 0;i < 8; i++) {
428
        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
429
        j = (j + 1) & 7;
430
    }
431
    asm volatile ("frstor %0" : "=m" (*fp));
432
}
433

    
434
static void save_native_fp_fsave(CPUState *env)
435
{
436
    int fptag, i, j;
437
    uint16_t fpuc;
438
    struct fpstate fp1, *fp = &fp1;
439

    
440
    asm volatile ("fsave %0" : : "m" (*fp));
441
    env->fpuc = fp->fpuc;
442
    env->fpstt = (fp->fpus >> 11) & 7;
443
    env->fpus = fp->fpus & ~0x3800;
444
    fptag = fp->fptag;
445
    for(i = 0;i < 8; i++) {
446
        env->fptags[i] = ((fptag & 3) == 3);
447
        fptag >>= 2;
448
    }
449
    j = env->fpstt;
450
    for(i = 0;i < 8; i++) {
451
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
452
        j = (j + 1) & 7;
453
    }
454
    /* we must restore the default rounding state */
455
    fpuc = 0x037f | (env->fpuc & (3 << 10));
456
    asm volatile("fldcw %0" : : "m" (fpuc));
457
}
458

    
459
static void restore_native_fp_fxrstor(CPUState *env)
460
{
461
    struct fpxstate *fp = &fpx1;
462
    int i, j, fptag;
463

    
464
    fp->fpuc = env->fpuc;
465
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
466
    fptag = 0;
467
    for(i = 0; i < 8; i++)
468
        fptag |= (env->fptags[i] << i);
469
    fp->fptag = fptag ^ 0xff;
470

    
471
    j = env->fpstt;
472
    for(i = 0;i < 8; i++) {
473
        memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
474
        j = (j + 1) & 7;
475
    }
476
    if (env->cpuid_features & CPUID_SSE) {
477
        fp->mxcsr = env->mxcsr;
478
        /* XXX: check if DAZ is not available */
479
        fp->mxcsr_mask = 0xffff;
480
        memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
481
    }
482
    asm volatile ("fxrstor %0" : "=m" (*fp));
483
}
484

    
485
static void save_native_fp_fxsave(CPUState *env)
486
{
487
    struct fpxstate *fp = &fpx1;
488
    int fptag, i, j;
489
    uint16_t fpuc;
490

    
491
    asm volatile ("fxsave %0" : : "m" (*fp));
492
    env->fpuc = fp->fpuc;
493
    env->fpstt = (fp->fpus >> 11) & 7;
494
    env->fpus = fp->fpus & ~0x3800;
495
    fptag = fp->fptag ^ 0xff;
496
    for(i = 0;i < 8; i++) {
497
        env->fptags[i] = (fptag >> i) & 1;
498
    }
499
    j = env->fpstt;
500
    for(i = 0;i < 8; i++) {
501
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
502
        j = (j + 1) & 7;
503
    }
504
    if (env->cpuid_features & CPUID_SSE) {
505
        env->mxcsr = fp->mxcsr;
506
        memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
507
    }
508

    
509
    /* we must restore the default rounding state */
510
    asm volatile ("fninit");
511
    fpuc = 0x037f | (env->fpuc & (3 << 10));
512
    asm volatile("fldcw %0" : : "m" (fpuc));
513
}
514

    
515
static int do_syscall(CPUState *env,
516
                      struct kqemu_cpu_state *kenv)
517
{
518
    int selector;
519

    
520
    selector = (env->star >> 32) & 0xffff;
521
#ifdef TARGET_X86_64
522
    if (env->hflags & HF_LMA_MASK) {
523
        int code64;
524

    
525
        env->regs[R_ECX] = kenv->next_eip;
526
        env->regs[11] = env->eflags;
527

    
528
        code64 = env->hflags & HF_CS64_MASK;
529

    
530
        cpu_x86_set_cpl(env, 0);
531
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
532
                               0, 0xffffffff,
533
                               DESC_G_MASK | DESC_P_MASK |
534
                               DESC_S_MASK |
535
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
536
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
537
                               0, 0xffffffff,
538
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
539
                               DESC_S_MASK |
540
                               DESC_W_MASK | DESC_A_MASK);
541
        env->eflags &= ~env->fmask;
542
        if (code64)
543
            env->eip = env->lstar;
544
        else
545
            env->eip = env->cstar;
546
    } else
547
#endif
548
    {
549
        env->regs[R_ECX] = (uint32_t)kenv->next_eip;
550

    
551
        cpu_x86_set_cpl(env, 0);
552
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
553
                           0, 0xffffffff,
554
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
555
                               DESC_S_MASK |
556
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
557
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
558
                               0, 0xffffffff,
559
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
560
                               DESC_S_MASK |
561
                               DESC_W_MASK | DESC_A_MASK);
562
        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
563
        env->eip = (uint32_t)env->star;
564
    }
565
    return 2;
566
}
567

    
568
#ifdef CONFIG_PROFILER
569

    
570
#define PC_REC_SIZE 1
571
#define PC_REC_HASH_BITS 16
572
#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
573

    
574
typedef struct PCRecord {
575
    unsigned long pc;
576
    int64_t count;
577
    struct PCRecord *next;
578
} PCRecord;
579

    
580
static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
581
static int nb_pc_records;
582

    
583
static void kqemu_record_pc(unsigned long pc)
584
{
585
    unsigned long h;
586
    PCRecord **pr, *r;
587

    
588
    h = pc / PC_REC_SIZE;
589
    h = h ^ (h >> PC_REC_HASH_BITS);
590
    h &= (PC_REC_HASH_SIZE - 1);
591
    pr = &pc_rec_hash[h];
592
    for(;;) {
593
        r = *pr;
594
        if (r == NULL)
595
            break;
596
        if (r->pc == pc) {
597
            r->count++;
598
            return;
599
        }
600
        pr = &r->next;
601
    }
602
    r = malloc(sizeof(PCRecord));
603
    r->count = 1;
604
    r->pc = pc;
605
    r->next = NULL;
606
    *pr = r;
607
    nb_pc_records++;
608
}
609

    
610
static int pc_rec_cmp(const void *p1, const void *p2)
611
{
612
    PCRecord *r1 = *(PCRecord **)p1;
613
    PCRecord *r2 = *(PCRecord **)p2;
614
    if (r1->count < r2->count)
615
        return 1;
616
    else if (r1->count == r2->count)
617
        return 0;
618
    else
619
        return -1;
620
}
621

    
622
static void kqemu_record_flush(void)
623
{
624
    PCRecord *r, *r_next;
625
    int h;
626

    
627
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
628
        for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
629
            r_next = r->next;
630
            free(r);
631
        }
632
        pc_rec_hash[h] = NULL;
633
    }
634
    nb_pc_records = 0;
635
}
636

    
637
void kqemu_record_dump(void)
638
{
639
    PCRecord **pr, *r;
640
    int i, h;
641
    FILE *f;
642
    int64_t total, sum;
643

    
644
    pr = malloc(sizeof(PCRecord *) * nb_pc_records);
645
    i = 0;
646
    total = 0;
647
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
648
        for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
649
            pr[i++] = r;
650
            total += r->count;
651
        }
652
    }
653
    qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
654

    
655
    f = fopen("/tmp/kqemu.stats", "w");
656
    if (!f) {
657
        perror("/tmp/kqemu.stats");
658
        exit(1);
659
    }
660
    fprintf(f, "total: %" PRId64 "\n", total);
661
    sum = 0;
662
    for(i = 0; i < nb_pc_records; i++) {
663
        r = pr[i];
664
        sum += r->count;
665
        fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
666
                r->pc,
667
                r->count,
668
                (double)r->count / (double)total * 100.0,
669
                (double)sum / (double)total * 100.0);
670
    }
671
    fclose(f);
672
    free(pr);
673

    
674
    kqemu_record_flush();
675
}
676
#endif
677

    
678
static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
679
                                  const SegmentCache *sc)
680
{
681
    ksc->selector = sc->selector;
682
    ksc->flags = sc->flags;
683
    ksc->limit = sc->limit;
684
    ksc->base = sc->base;
685
}
686

    
687
static inline void kqemu_save_seg(SegmentCache *sc,
688
                                  const struct kqemu_segment_cache *ksc)
689
{
690
    sc->selector = ksc->selector;
691
    sc->flags = ksc->flags;
692
    sc->limit = ksc->limit;
693
    sc->base = ksc->base;
694
}
695

    
696
int kqemu_cpu_exec(CPUState *env)
697
{
698
    struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
699
    int ret, cpl, i;
700
#ifdef CONFIG_PROFILER
701
    int64_t ti;
702
#endif
703
#ifdef _WIN32
704
    DWORD temp;
705
#endif
706

    
707
#ifdef CONFIG_PROFILER
708
    ti = profile_getclock();
709
#endif
710
    LOG_INT("kqemu: cpu_exec: enter\n");
711
    LOG_INT_STATE(env);
712
    for(i = 0; i < CPU_NB_REGS; i++)
713
        kenv->regs[i] = env->regs[i];
714
    kenv->eip = env->eip;
715
    kenv->eflags = env->eflags;
716
    for(i = 0; i < 6; i++)
717
        kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
718
    kqemu_load_seg(&kenv->ldt, &env->ldt);
719
    kqemu_load_seg(&kenv->tr, &env->tr);
720
    kqemu_load_seg(&kenv->gdt, &env->gdt);
721
    kqemu_load_seg(&kenv->idt, &env->idt);
722
    kenv->cr0 = env->cr[0];
723
    kenv->cr2 = env->cr[2];
724
    kenv->cr3 = env->cr[3];
725
    kenv->cr4 = env->cr[4];
726
    kenv->a20_mask = env->a20_mask;
727
    kenv->efer = env->efer;
728
    kenv->tsc_offset = 0;
729
    kenv->star = env->star;
730
    kenv->sysenter_cs = env->sysenter_cs;
731
    kenv->sysenter_esp = env->sysenter_esp;
732
    kenv->sysenter_eip = env->sysenter_eip;
733
#ifdef TARGET_X86_64
734
    kenv->lstar = env->lstar;
735
    kenv->cstar = env->cstar;
736
    kenv->fmask = env->fmask;
737
    kenv->kernelgsbase = env->kernelgsbase;
738
#endif
739
    if (env->dr[7] & 0xff) {
740
        kenv->dr7 = env->dr[7];
741
        kenv->dr0 = env->dr[0];
742
        kenv->dr1 = env->dr[1];
743
        kenv->dr2 = env->dr[2];
744
        kenv->dr3 = env->dr[3];
745
    } else {
746
        kenv->dr7 = 0;
747
    }
748
    kenv->dr6 = env->dr[6];
749
    cpl = (env->hflags & HF_CPL_MASK);
750
    kenv->cpl = cpl;
751
    kenv->nb_pages_to_flush = nb_pages_to_flush;
752
    kenv->user_only = (env->kqemu_enabled == 1);
753
    kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
754
    nb_ram_pages_to_update = 0;
755
    kenv->nb_modified_ram_pages = nb_modified_ram_pages;
756

    
757
    kqemu_reset_modified_ram_pages();
758

    
759
    if (env->cpuid_features & CPUID_FXSR)
760
        restore_native_fp_fxrstor(env);
761
    else
762
        restore_native_fp_frstor(env);
763

    
764
#ifdef _WIN32
765
    if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
766
                        kenv, sizeof(struct kqemu_cpu_state),
767
                        kenv, sizeof(struct kqemu_cpu_state),
768
                        &temp, NULL)) {
769
        ret = kenv->retval;
770
    } else {
771
        ret = -1;
772
    }
773
#else
774
    ioctl(kqemu_fd, KQEMU_EXEC, kenv);
775
    ret = kenv->retval;
776
#endif
777
    if (env->cpuid_features & CPUID_FXSR)
778
        save_native_fp_fxsave(env);
779
    else
780
        save_native_fp_fsave(env);
781

    
782
    for(i = 0; i < CPU_NB_REGS; i++)
783
        env->regs[i] = kenv->regs[i];
784
    env->eip = kenv->eip;
785
    env->eflags = kenv->eflags;
786
    for(i = 0; i < 6; i++)
787
        kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
788
    cpu_x86_set_cpl(env, kenv->cpl);
789
    kqemu_save_seg(&env->ldt, &kenv->ldt);
790
    env->cr[0] = kenv->cr0;
791
    env->cr[4] = kenv->cr4;
792
    env->cr[3] = kenv->cr3;
793
    env->cr[2] = kenv->cr2;
794
    env->dr[6] = kenv->dr6;
795
#ifdef TARGET_X86_64
796
    env->kernelgsbase = kenv->kernelgsbase;
797
#endif
798

    
799
    /* flush pages as indicated by kqemu */
800
    if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
801
        tlb_flush(env, 1);
802
    } else {
803
        for(i = 0; i < kenv->nb_pages_to_flush; i++) {
804
            tlb_flush_page(env, pages_to_flush[i]);
805
        }
806
    }
807
    nb_pages_to_flush = 0;
808

    
809
#ifdef CONFIG_PROFILER
810
    kqemu_time += profile_getclock() - ti;
811
    kqemu_exec_count++;
812
#endif
813

    
814
    if (kenv->nb_ram_pages_to_update > 0) {
815
        cpu_tlb_update_dirty(env);
816
    }
817

    
818
    if (kenv->nb_modified_ram_pages > 0) {
819
        for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
820
            unsigned long addr;
821
            addr = modified_ram_pages[i];
822
            tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
823
        }
824
    }
825

    
826
    /* restore the hidden flags */
827
    {
828
        unsigned int new_hflags;
829
#ifdef TARGET_X86_64
830
        if ((env->hflags & HF_LMA_MASK) &&
831
            (env->segs[R_CS].flags & DESC_L_MASK)) {
832
            /* long mode */
833
            new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
834
        } else
835
#endif
836
        {
837
            /* legacy / compatibility case */
838
            new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
839
                >> (DESC_B_SHIFT - HF_CS32_SHIFT);
840
            new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
841
                >> (DESC_B_SHIFT - HF_SS32_SHIFT);
842
            if (!(env->cr[0] & CR0_PE_MASK) ||
843
                   (env->eflags & VM_MASK) ||
844
                   !(env->hflags & HF_CS32_MASK)) {
845
                /* XXX: try to avoid this test. The problem comes from the
846
                   fact that is real mode or vm86 mode we only modify the
847
                   'base' and 'selector' fields of the segment cache to go
848
                   faster. A solution may be to force addseg to one in
849
                   translate-i386.c. */
850
                new_hflags |= HF_ADDSEG_MASK;
851
            } else {
852
                new_hflags |= ((env->segs[R_DS].base |
853
                                env->segs[R_ES].base |
854
                                env->segs[R_SS].base) != 0) <<
855
                    HF_ADDSEG_SHIFT;
856
            }
857
        }
858
        env->hflags = (env->hflags &
859
           ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
860
            new_hflags;
861
    }
862
    /* update FPU flags */
863
    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
864
        ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
865
    if (env->cr[4] & CR4_OSFXSR_MASK)
866
        env->hflags |= HF_OSFXSR_MASK;
867
    else
868
        env->hflags &= ~HF_OSFXSR_MASK;
869

    
870
    LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
871
    if (ret == KQEMU_RET_SYSCALL) {
872
        /* syscall instruction */
873
        return do_syscall(env, kenv);
874
    } else
875
    if ((ret & 0xff00) == KQEMU_RET_INT) {
876
        env->exception_index = ret & 0xff;
877
        env->error_code = 0;
878
        env->exception_is_int = 1;
879
        env->exception_next_eip = kenv->next_eip;
880
#ifdef CONFIG_PROFILER
881
        kqemu_ret_int_count++;
882
#endif
883
        LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
884
        LOG_INT_STATE(env);
885
        return 1;
886
    } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
887
        env->exception_index = ret & 0xff;
888
        env->error_code = kenv->error_code;
889
        env->exception_is_int = 0;
890
        env->exception_next_eip = 0;
891
#ifdef CONFIG_PROFILER
892
        kqemu_ret_excp_count++;
893
#endif
894
        LOG_INT("kqemu: exception v=%02x e=%04x:\n",
895
                    env->exception_index, env->error_code);
896
        LOG_INT_STATE(env);
897
        return 1;
898
    } else if (ret == KQEMU_RET_INTR) {
899
#ifdef CONFIG_PROFILER
900
        kqemu_ret_intr_count++;
901
#endif
902
        LOG_INT_STATE(env);
903
        return 0;
904
    } else if (ret == KQEMU_RET_SOFTMMU) {
905
#ifdef CONFIG_PROFILER
906
        {
907
            unsigned long pc = env->eip + env->segs[R_CS].base;
908
            kqemu_record_pc(pc);
909
        }
910
#endif
911
        LOG_INT_STATE(env);
912
        return 2;
913
    } else {
914
        cpu_dump_state(env, stderr, fprintf, 0);
915
        fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
916
        exit(1);
917
    }
918
    return 0;
919
}
920

    
921
void kqemu_cpu_interrupt(CPUState *env)
922
{
923
#if defined(_WIN32)
924
    /* cancelling the I/O request causes KQEMU to finish executing the
925
       current block and successfully returning. */
926
    CancelIo(kqemu_fd);
927
#endif
928
}
929

    
930
/* 
931
   QEMU paravirtualization interface. The current interface only
932
   allows to modify the IF and IOPL flags when running in
933
   kqemu.
934

935
   At this point it is not very satisfactory. I leave it for reference
936
   as it adds little complexity.
937
*/
938

    
939
#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
940

    
941
static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
942
{
943
    return 0;
944
}
945

    
946
static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
947
{
948
    return 0;
949
}
950

    
951
static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
952
{
953
}
954

    
955
static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
956
{
957
}
958

    
959
static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
960
{
961
    CPUState *env;
962

    
963
    env = cpu_single_env;
964
    if (!env)
965
        return 0;
966
    return env->eflags & (IF_MASK | IOPL_MASK);
967
}
968

    
969
/* Note: after writing to this address, the guest code must make sure
970
   it is exiting the current TB. pushf/popf can be used for that
971
   purpose. */
972
static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
973
{
974
    CPUState *env;
975

    
976
    env = cpu_single_env;
977
    if (!env)
978
        return;
979
    env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) | 
980
        (val & (IF_MASK | IOPL_MASK));
981
}
982

    
983
static CPUReadMemoryFunc *qpi_mem_read[3] = {
984
    qpi_mem_readb,
985
    qpi_mem_readw,
986
    qpi_mem_readl,
987
};
988

    
989
static CPUWriteMemoryFunc *qpi_mem_write[3] = {
990
    qpi_mem_writeb,
991
    qpi_mem_writew,
992
    qpi_mem_writel,
993
};
994

    
995
static void qpi_init(void)
996
{
997
    kqemu_comm_base = 0xff000000 | 1;
998
    qpi_io_memory = cpu_register_io_memory(0, 
999
                                           qpi_mem_read, 
1000
                                           qpi_mem_write, NULL);
1001
    cpu_register_physical_memory(kqemu_comm_base & ~0xfff, 
1002
                                 0x1000, qpi_io_memory);
1003
}
1004
#endif