Statistics
| Branch: | Revision:

root / cpus.c @ f0da3757

History | View | Annotate | Download (33.1 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor.h"
29
#include "sysemu.h"
30
#include "gdbstub.h"
31
#include "dma.h"
32
#include "kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu-thread.h"
36
#include "cpus.h"
37
#include "qtest.h"
38
#include "main-loop.h"
39
#include "bitmap.h"
40

    
41
#ifndef _WIN32
42
#include "compatfd.h"
43
#endif
44

    
45
#ifdef CONFIG_LINUX
46

    
47
#include <sys/prctl.h>
48

    
49
#ifndef PR_MCE_KILL
50
#define PR_MCE_KILL 33
51
#endif
52

    
53
#ifndef PR_MCE_KILL_SET
54
#define PR_MCE_KILL_SET 1
55
#endif
56

    
57
#ifndef PR_MCE_KILL_EARLY
58
#define PR_MCE_KILL_EARLY 1
59
#endif
60

    
61
#endif /* CONFIG_LINUX */
62

    
63
static CPUArchState *next_cpu;
64

    
65
static bool cpu_thread_is_idle(CPUArchState *env)
66
{
67
    if (env->stop || env->queued_work_first) {
68
        return false;
69
    }
70
    if (env->stopped || !runstate_is_running()) {
71
        return true;
72
    }
73
    if (!env->halted || qemu_cpu_has_work(env) ||
74
        kvm_async_interrupts_enabled()) {
75
        return false;
76
    }
77
    return true;
78
}
79

    
80
static bool all_cpu_threads_idle(void)
81
{
82
    CPUArchState *env;
83

    
84
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
85
        if (!cpu_thread_is_idle(env)) {
86
            return false;
87
        }
88
    }
89
    return true;
90
}
91

    
92
/***********************************************************/
93
/* guest cycle counter */
94

    
95
/* Conversion factor from emulated instructions to virtual clock ticks.  */
96
static int icount_time_shift;
97
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
98
#define MAX_ICOUNT_SHIFT 10
99
/* Compensate for varying guest execution speed.  */
100
static int64_t qemu_icount_bias;
101
static QEMUTimer *icount_rt_timer;
102
static QEMUTimer *icount_vm_timer;
103
static QEMUTimer *icount_warp_timer;
104
static int64_t vm_clock_warp_start;
105
static int64_t qemu_icount;
106

    
107
typedef struct TimersState {
108
    int64_t cpu_ticks_prev;
109
    int64_t cpu_ticks_offset;
110
    int64_t cpu_clock_offset;
111
    int32_t cpu_ticks_enabled;
112
    int64_t dummy;
113
} TimersState;
114

    
115
TimersState timers_state;
116

    
117
/* Return the virtual CPU time, based on the instruction counter.  */
118
int64_t cpu_get_icount(void)
119
{
120
    int64_t icount;
121
    CPUArchState *env = cpu_single_env;
122

    
123
    icount = qemu_icount;
124
    if (env) {
125
        if (!can_do_io(env)) {
126
            fprintf(stderr, "Bad clock read\n");
127
        }
128
        icount -= (env->icount_decr.u16.low + env->icount_extra);
129
    }
130
    return qemu_icount_bias + (icount << icount_time_shift);
131
}
132

    
133
/* return the host CPU cycle counter and handle stop/restart */
134
int64_t cpu_get_ticks(void)
135
{
136
    if (use_icount) {
137
        return cpu_get_icount();
138
    }
139
    if (!timers_state.cpu_ticks_enabled) {
140
        return timers_state.cpu_ticks_offset;
141
    } else {
142
        int64_t ticks;
143
        ticks = cpu_get_real_ticks();
144
        if (timers_state.cpu_ticks_prev > ticks) {
145
            /* Note: non increasing ticks may happen if the host uses
146
               software suspend */
147
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
148
        }
149
        timers_state.cpu_ticks_prev = ticks;
150
        return ticks + timers_state.cpu_ticks_offset;
151
    }
152
}
153

    
154
/* return the host CPU monotonic timer and handle stop/restart */
155
int64_t cpu_get_clock(void)
156
{
157
    int64_t ti;
158
    if (!timers_state.cpu_ticks_enabled) {
159
        return timers_state.cpu_clock_offset;
160
    } else {
161
        ti = get_clock();
162
        return ti + timers_state.cpu_clock_offset;
163
    }
164
}
165

    
166
/* enable cpu_get_ticks() */
167
void cpu_enable_ticks(void)
168
{
169
    if (!timers_state.cpu_ticks_enabled) {
170
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
171
        timers_state.cpu_clock_offset -= get_clock();
172
        timers_state.cpu_ticks_enabled = 1;
173
    }
174
}
175

    
176
/* disable cpu_get_ticks() : the clock is stopped. You must not call
177
   cpu_get_ticks() after that.  */
178
void cpu_disable_ticks(void)
179
{
180
    if (timers_state.cpu_ticks_enabled) {
181
        timers_state.cpu_ticks_offset = cpu_get_ticks();
182
        timers_state.cpu_clock_offset = cpu_get_clock();
183
        timers_state.cpu_ticks_enabled = 0;
184
    }
185
}
186

    
187
/* Correlation between real and virtual time is always going to be
188
   fairly approximate, so ignore small variation.
189
   When the guest is idle real and virtual time will be aligned in
190
   the IO wait loop.  */
191
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
192

    
193
static void icount_adjust(void)
194
{
195
    int64_t cur_time;
196
    int64_t cur_icount;
197
    int64_t delta;
198
    static int64_t last_delta;
199
    /* If the VM is not running, then do nothing.  */
200
    if (!runstate_is_running()) {
201
        return;
202
    }
203
    cur_time = cpu_get_clock();
204
    cur_icount = qemu_get_clock_ns(vm_clock);
205
    delta = cur_icount - cur_time;
206
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
207
    if (delta > 0
208
        && last_delta + ICOUNT_WOBBLE < delta * 2
209
        && icount_time_shift > 0) {
210
        /* The guest is getting too far ahead.  Slow time down.  */
211
        icount_time_shift--;
212
    }
213
    if (delta < 0
214
        && last_delta - ICOUNT_WOBBLE > delta * 2
215
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
216
        /* The guest is getting too far behind.  Speed time up.  */
217
        icount_time_shift++;
218
    }
219
    last_delta = delta;
220
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
221
}
222

    
223
static void icount_adjust_rt(void *opaque)
224
{
225
    qemu_mod_timer(icount_rt_timer,
226
                   qemu_get_clock_ms(rt_clock) + 1000);
227
    icount_adjust();
228
}
229

    
230
static void icount_adjust_vm(void *opaque)
231
{
232
    qemu_mod_timer(icount_vm_timer,
233
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
234
    icount_adjust();
235
}
236

    
237
static int64_t qemu_icount_round(int64_t count)
238
{
239
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
240
}
241

    
242
static void icount_warp_rt(void *opaque)
243
{
244
    if (vm_clock_warp_start == -1) {
245
        return;
246
    }
247

    
248
    if (runstate_is_running()) {
249
        int64_t clock = qemu_get_clock_ns(rt_clock);
250
        int64_t warp_delta = clock - vm_clock_warp_start;
251
        if (use_icount == 1) {
252
            qemu_icount_bias += warp_delta;
253
        } else {
254
            /*
255
             * In adaptive mode, do not let the vm_clock run too
256
             * far ahead of real time.
257
             */
258
            int64_t cur_time = cpu_get_clock();
259
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
260
            int64_t delta = cur_time - cur_icount;
261
            qemu_icount_bias += MIN(warp_delta, delta);
262
        }
263
        if (qemu_clock_expired(vm_clock)) {
264
            qemu_notify_event();
265
        }
266
    }
267
    vm_clock_warp_start = -1;
268
}
269

    
270
void qtest_clock_warp(int64_t dest)
271
{
272
    int64_t clock = qemu_get_clock_ns(vm_clock);
273
    assert(qtest_enabled());
274
    while (clock < dest) {
275
        int64_t deadline = qemu_clock_deadline(vm_clock);
276
        int64_t warp = MIN(dest - clock, deadline);
277
        qemu_icount_bias += warp;
278
        qemu_run_timers(vm_clock);
279
        clock = qemu_get_clock_ns(vm_clock);
280
    }
281
    qemu_notify_event();
282
}
283

    
284
void qemu_clock_warp(QEMUClock *clock)
285
{
286
    int64_t deadline;
287

    
288
    /*
289
     * There are too many global variables to make the "warp" behavior
290
     * applicable to other clocks.  But a clock argument removes the
291
     * need for if statements all over the place.
292
     */
293
    if (clock != vm_clock || !use_icount) {
294
        return;
295
    }
296

    
297
    /*
298
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
299
     * ensures that the deadline for the timer is computed correctly below.
300
     * This also makes sure that the insn counter is synchronized before the
301
     * CPU starts running, in case the CPU is woken by an event other than
302
     * the earliest vm_clock timer.
303
     */
304
    icount_warp_rt(NULL);
305
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
306
        qemu_del_timer(icount_warp_timer);
307
        return;
308
    }
309

    
310
    if (qtest_enabled()) {
311
        /* When testing, qtest commands advance icount.  */
312
        return;
313
    }
314

    
315
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
316
    deadline = qemu_clock_deadline(vm_clock);
317
    if (deadline > 0) {
318
        /*
319
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
320
         * sleep.  Otherwise, the CPU might be waiting for a future timer
321
         * interrupt to wake it up, but the interrupt never comes because
322
         * the vCPU isn't running any insns and thus doesn't advance the
323
         * vm_clock.
324
         *
325
         * An extreme solution for this problem would be to never let VCPUs
326
         * sleep in icount mode if there is a pending vm_clock timer; rather
327
         * time could just advance to the next vm_clock event.  Instead, we
328
         * do stop VCPUs and only advance vm_clock after some "real" time,
329
         * (related to the time left until the next event) has passed.  This
330
         * rt_clock timer will do this.  This avoids that the warps are too
331
         * visible externally---for example, you will not be sending network
332
         * packets continuously instead of every 100ms.
333
         */
334
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
335
    } else {
336
        qemu_notify_event();
337
    }
338
}
339

    
340
static const VMStateDescription vmstate_timers = {
341
    .name = "timer",
342
    .version_id = 2,
343
    .minimum_version_id = 1,
344
    .minimum_version_id_old = 1,
345
    .fields      = (VMStateField[]) {
346
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
347
        VMSTATE_INT64(dummy, TimersState),
348
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
349
        VMSTATE_END_OF_LIST()
350
    }
351
};
352

    
353
void configure_icount(const char *option)
354
{
355
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
356
    if (!option) {
357
        return;
358
    }
359

    
360
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
361
    if (strcmp(option, "auto") != 0) {
362
        icount_time_shift = strtol(option, NULL, 0);
363
        use_icount = 1;
364
        return;
365
    }
366

    
367
    use_icount = 2;
368

    
369
    /* 125MIPS seems a reasonable initial guess at the guest speed.
370
       It will be corrected fairly quickly anyway.  */
371
    icount_time_shift = 3;
372

    
373
    /* Have both realtime and virtual time triggers for speed adjustment.
374
       The realtime trigger catches emulated time passing too slowly,
375
       the virtual time trigger catches emulated time passing too fast.
376
       Realtime triggers occur even when idle, so use them less frequently
377
       than VM triggers.  */
378
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
379
    qemu_mod_timer(icount_rt_timer,
380
                   qemu_get_clock_ms(rt_clock) + 1000);
381
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
382
    qemu_mod_timer(icount_vm_timer,
383
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
384
}
385

    
386
/***********************************************************/
387
void hw_error(const char *fmt, ...)
388
{
389
    va_list ap;
390
    CPUArchState *env;
391

    
392
    va_start(ap, fmt);
393
    fprintf(stderr, "qemu: hardware error: ");
394
    vfprintf(stderr, fmt, ap);
395
    fprintf(stderr, "\n");
396
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
397
        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
398
#ifdef TARGET_I386
399
        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
400
#else
401
        cpu_dump_state(env, stderr, fprintf, 0);
402
#endif
403
    }
404
    va_end(ap);
405
    abort();
406
}
407

    
408
void cpu_synchronize_all_states(void)
409
{
410
    CPUArchState *cpu;
411

    
412
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
413
        cpu_synchronize_state(cpu);
414
    }
415
}
416

    
417
void cpu_synchronize_all_post_reset(void)
418
{
419
    CPUArchState *cpu;
420

    
421
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
422
        cpu_synchronize_post_reset(cpu);
423
    }
424
}
425

    
426
void cpu_synchronize_all_post_init(void)
427
{
428
    CPUArchState *cpu;
429

    
430
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
431
        cpu_synchronize_post_init(cpu);
432
    }
433
}
434

    
435
int cpu_is_stopped(CPUArchState *env)
436
{
437
    return !runstate_is_running() || env->stopped;
438
}
439

    
440
static void do_vm_stop(RunState state)
441
{
442
    if (runstate_is_running()) {
443
        cpu_disable_ticks();
444
        pause_all_vcpus();
445
        runstate_set(state);
446
        vm_state_notify(0, state);
447
        bdrv_drain_all();
448
        bdrv_flush_all();
449
        monitor_protocol_event(QEVENT_STOP, NULL);
450
    }
451
}
452

    
453
static int cpu_can_run(CPUArchState *env)
454
{
455
    if (env->stop) {
456
        return 0;
457
    }
458
    if (env->stopped || !runstate_is_running()) {
459
        return 0;
460
    }
461
    return 1;
462
}
463

    
464
static void cpu_handle_guest_debug(CPUArchState *env)
465
{
466
    gdb_set_stop_cpu(env);
467
    qemu_system_debug_request();
468
    env->stopped = 1;
469
}
470

    
471
static void cpu_signal(int sig)
472
{
473
    if (cpu_single_env) {
474
        cpu_exit(cpu_single_env);
475
    }
476
    exit_request = 1;
477
}
478

    
479
#ifdef CONFIG_LINUX
480
static void sigbus_reraise(void)
481
{
482
    sigset_t set;
483
    struct sigaction action;
484

    
485
    memset(&action, 0, sizeof(action));
486
    action.sa_handler = SIG_DFL;
487
    if (!sigaction(SIGBUS, &action, NULL)) {
488
        raise(SIGBUS);
489
        sigemptyset(&set);
490
        sigaddset(&set, SIGBUS);
491
        sigprocmask(SIG_UNBLOCK, &set, NULL);
492
    }
493
    perror("Failed to re-raise SIGBUS!\n");
494
    abort();
495
}
496

    
497
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
498
                           void *ctx)
499
{
500
    if (kvm_on_sigbus(siginfo->ssi_code,
501
                      (void *)(intptr_t)siginfo->ssi_addr)) {
502
        sigbus_reraise();
503
    }
504
}
505

    
506
static void qemu_init_sigbus(void)
507
{
508
    struct sigaction action;
509

    
510
    memset(&action, 0, sizeof(action));
511
    action.sa_flags = SA_SIGINFO;
512
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
513
    sigaction(SIGBUS, &action, NULL);
514

    
515
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
516
}
517

    
518
static void qemu_kvm_eat_signals(CPUArchState *env)
519
{
520
    struct timespec ts = { 0, 0 };
521
    siginfo_t siginfo;
522
    sigset_t waitset;
523
    sigset_t chkset;
524
    int r;
525

    
526
    sigemptyset(&waitset);
527
    sigaddset(&waitset, SIG_IPI);
528
    sigaddset(&waitset, SIGBUS);
529

    
530
    do {
531
        r = sigtimedwait(&waitset, &siginfo, &ts);
532
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
533
            perror("sigtimedwait");
534
            exit(1);
535
        }
536

    
537
        switch (r) {
538
        case SIGBUS:
539
            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
540
                sigbus_reraise();
541
            }
542
            break;
543
        default:
544
            break;
545
        }
546

    
547
        r = sigpending(&chkset);
548
        if (r == -1) {
549
            perror("sigpending");
550
            exit(1);
551
        }
552
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
553
}
554

    
555
#else /* !CONFIG_LINUX */
556

    
557
static void qemu_init_sigbus(void)
558
{
559
}
560

    
561
static void qemu_kvm_eat_signals(CPUArchState *env)
562
{
563
}
564
#endif /* !CONFIG_LINUX */
565

    
566
#ifndef _WIN32
567
static void dummy_signal(int sig)
568
{
569
}
570

    
571
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
572
{
573
    int r;
574
    sigset_t set;
575
    struct sigaction sigact;
576

    
577
    memset(&sigact, 0, sizeof(sigact));
578
    sigact.sa_handler = dummy_signal;
579
    sigaction(SIG_IPI, &sigact, NULL);
580

    
581
    pthread_sigmask(SIG_BLOCK, NULL, &set);
582
    sigdelset(&set, SIG_IPI);
583
    sigdelset(&set, SIGBUS);
584
    r = kvm_set_signal_mask(env, &set);
585
    if (r) {
586
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
587
        exit(1);
588
    }
589
}
590

    
591
static void qemu_tcg_init_cpu_signals(void)
592
{
593
    sigset_t set;
594
    struct sigaction sigact;
595

    
596
    memset(&sigact, 0, sizeof(sigact));
597
    sigact.sa_handler = cpu_signal;
598
    sigaction(SIG_IPI, &sigact, NULL);
599

    
600
    sigemptyset(&set);
601
    sigaddset(&set, SIG_IPI);
602
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
603
}
604

    
605
#else /* _WIN32 */
606
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
607
{
608
    abort();
609
}
610

    
611
static void qemu_tcg_init_cpu_signals(void)
612
{
613
}
614
#endif /* _WIN32 */
615

    
616
QemuMutex qemu_global_mutex;
617
static QemuCond qemu_io_proceeded_cond;
618
static bool iothread_requesting_mutex;
619

    
620
static QemuThread io_thread;
621

    
622
static QemuThread *tcg_cpu_thread;
623
static QemuCond *tcg_halt_cond;
624

    
625
/* cpu creation */
626
static QemuCond qemu_cpu_cond;
627
/* system init */
628
static QemuCond qemu_pause_cond;
629
static QemuCond qemu_work_cond;
630

    
631
void qemu_init_cpu_loop(void)
632
{
633
    qemu_init_sigbus();
634
    qemu_cond_init(&qemu_cpu_cond);
635
    qemu_cond_init(&qemu_pause_cond);
636
    qemu_cond_init(&qemu_work_cond);
637
    qemu_cond_init(&qemu_io_proceeded_cond);
638
    qemu_mutex_init(&qemu_global_mutex);
639

    
640
    qemu_thread_get_self(&io_thread);
641
}
642

    
643
void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
644
{
645
    struct qemu_work_item wi;
646

    
647
    if (qemu_cpu_is_self(env)) {
648
        func(data);
649
        return;
650
    }
651

    
652
    wi.func = func;
653
    wi.data = data;
654
    if (!env->queued_work_first) {
655
        env->queued_work_first = &wi;
656
    } else {
657
        env->queued_work_last->next = &wi;
658
    }
659
    env->queued_work_last = &wi;
660
    wi.next = NULL;
661
    wi.done = false;
662

    
663
    qemu_cpu_kick(env);
664
    while (!wi.done) {
665
        CPUArchState *self_env = cpu_single_env;
666

    
667
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
668
        cpu_single_env = self_env;
669
    }
670
}
671

    
672
static void flush_queued_work(CPUArchState *env)
673
{
674
    struct qemu_work_item *wi;
675

    
676
    if (!env->queued_work_first) {
677
        return;
678
    }
679

    
680
    while ((wi = env->queued_work_first)) {
681
        env->queued_work_first = wi->next;
682
        wi->func(wi->data);
683
        wi->done = true;
684
    }
685
    env->queued_work_last = NULL;
686
    qemu_cond_broadcast(&qemu_work_cond);
687
}
688

    
689
static void qemu_wait_io_event_common(CPUArchState *env)
690
{
691
    CPUState *cpu = ENV_GET_CPU(env);
692

    
693
    if (env->stop) {
694
        env->stop = 0;
695
        env->stopped = 1;
696
        qemu_cond_signal(&qemu_pause_cond);
697
    }
698
    flush_queued_work(env);
699
    cpu->thread_kicked = false;
700
}
701

    
702
static void qemu_tcg_wait_io_event(void)
703
{
704
    CPUArchState *env;
705

    
706
    while (all_cpu_threads_idle()) {
707
       /* Start accounting real time to the virtual clock if the CPUs
708
          are idle.  */
709
        qemu_clock_warp(vm_clock);
710
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
711
    }
712

    
713
    while (iothread_requesting_mutex) {
714
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
715
    }
716

    
717
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
718
        qemu_wait_io_event_common(env);
719
    }
720
}
721

    
722
static void qemu_kvm_wait_io_event(CPUArchState *env)
723
{
724
    while (cpu_thread_is_idle(env)) {
725
        qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
726
    }
727

    
728
    qemu_kvm_eat_signals(env);
729
    qemu_wait_io_event_common(env);
730
}
731

    
732
static void *qemu_kvm_cpu_thread_fn(void *arg)
733
{
734
    CPUArchState *env = arg;
735
    CPUState *cpu = ENV_GET_CPU(env);
736
    int r;
737

    
738
    qemu_mutex_lock(&qemu_global_mutex);
739
    qemu_thread_get_self(cpu->thread);
740
    env->thread_id = qemu_get_thread_id();
741
    cpu_single_env = env;
742

    
743
    r = kvm_init_vcpu(env);
744
    if (r < 0) {
745
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
746
        exit(1);
747
    }
748

    
749
    qemu_kvm_init_cpu_signals(env);
750

    
751
    /* signal CPU creation */
752
    env->created = 1;
753
    qemu_cond_signal(&qemu_cpu_cond);
754

    
755
    while (1) {
756
        if (cpu_can_run(env)) {
757
            r = kvm_cpu_exec(env);
758
            if (r == EXCP_DEBUG) {
759
                cpu_handle_guest_debug(env);
760
            }
761
        }
762
        qemu_kvm_wait_io_event(env);
763
    }
764

    
765
    return NULL;
766
}
767

    
768
static void *qemu_dummy_cpu_thread_fn(void *arg)
769
{
770
#ifdef _WIN32
771
    fprintf(stderr, "qtest is not supported under Windows\n");
772
    exit(1);
773
#else
774
    CPUArchState *env = arg;
775
    CPUState *cpu = ENV_GET_CPU(env);
776
    sigset_t waitset;
777
    int r;
778

    
779
    qemu_mutex_lock_iothread();
780
    qemu_thread_get_self(cpu->thread);
781
    env->thread_id = qemu_get_thread_id();
782

    
783
    sigemptyset(&waitset);
784
    sigaddset(&waitset, SIG_IPI);
785

    
786
    /* signal CPU creation */
787
    env->created = 1;
788
    qemu_cond_signal(&qemu_cpu_cond);
789

    
790
    cpu_single_env = env;
791
    while (1) {
792
        cpu_single_env = NULL;
793
        qemu_mutex_unlock_iothread();
794
        do {
795
            int sig;
796
            r = sigwait(&waitset, &sig);
797
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
798
        if (r == -1) {
799
            perror("sigwait");
800
            exit(1);
801
        }
802
        qemu_mutex_lock_iothread();
803
        cpu_single_env = env;
804
        qemu_wait_io_event_common(env);
805
    }
806

    
807
    return NULL;
808
#endif
809
}
810

    
811
static void tcg_exec_all(void);
812

    
813
static void *qemu_tcg_cpu_thread_fn(void *arg)
814
{
815
    CPUArchState *env = arg;
816
    CPUState *cpu = ENV_GET_CPU(env);
817

    
818
    qemu_tcg_init_cpu_signals();
819
    qemu_thread_get_self(cpu->thread);
820

    
821
    /* signal CPU creation */
822
    qemu_mutex_lock(&qemu_global_mutex);
823
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
824
        env->thread_id = qemu_get_thread_id();
825
        env->created = 1;
826
    }
827
    qemu_cond_signal(&qemu_cpu_cond);
828

    
829
    /* wait for initial kick-off after machine start */
830
    while (first_cpu->stopped) {
831
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
832

    
833
        /* process any pending work */
834
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
835
            qemu_wait_io_event_common(env);
836
        }
837
    }
838

    
839
    while (1) {
840
        tcg_exec_all();
841
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
842
            qemu_notify_event();
843
        }
844
        qemu_tcg_wait_io_event();
845
    }
846

    
847
    return NULL;
848
}
849

    
850
static void qemu_cpu_kick_thread(CPUArchState *env)
851
{
852
    CPUState *cpu = ENV_GET_CPU(env);
853
#ifndef _WIN32
854
    int err;
855

    
856
    err = pthread_kill(cpu->thread->thread, SIG_IPI);
857
    if (err) {
858
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
859
        exit(1);
860
    }
861
#else /* _WIN32 */
862
    if (!qemu_cpu_is_self(env)) {
863
        SuspendThread(cpu->hThread);
864
        cpu_signal(0);
865
        ResumeThread(cpu->hThread);
866
    }
867
#endif
868
}
869

    
870
void qemu_cpu_kick(void *_env)
871
{
872
    CPUArchState *env = _env;
873
    CPUState *cpu = ENV_GET_CPU(env);
874

    
875
    qemu_cond_broadcast(env->halt_cond);
876
    if (!tcg_enabled() && !cpu->thread_kicked) {
877
        qemu_cpu_kick_thread(env);
878
        cpu->thread_kicked = true;
879
    }
880
}
881

    
882
void qemu_cpu_kick_self(void)
883
{
884
#ifndef _WIN32
885
    assert(cpu_single_env);
886
    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
887

    
888
    if (!cpu_single_cpu->thread_kicked) {
889
        qemu_cpu_kick_thread(cpu_single_env);
890
        cpu_single_cpu->thread_kicked = true;
891
    }
892
#else
893
    abort();
894
#endif
895
}
896

    
897
int qemu_cpu_is_self(void *_env)
898
{
899
    CPUArchState *env = _env;
900
    CPUState *cpu = ENV_GET_CPU(env);
901

    
902
    return qemu_thread_is_self(cpu->thread);
903
}
904

    
905
void qemu_mutex_lock_iothread(void)
906
{
907
    if (!tcg_enabled()) {
908
        qemu_mutex_lock(&qemu_global_mutex);
909
    } else {
910
        iothread_requesting_mutex = true;
911
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
912
            qemu_cpu_kick_thread(first_cpu);
913
            qemu_mutex_lock(&qemu_global_mutex);
914
        }
915
        iothread_requesting_mutex = false;
916
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
917
    }
918
}
919

    
920
void qemu_mutex_unlock_iothread(void)
921
{
922
    qemu_mutex_unlock(&qemu_global_mutex);
923
}
924

    
925
static int all_vcpus_paused(void)
926
{
927
    CPUArchState *penv = first_cpu;
928

    
929
    while (penv) {
930
        if (!penv->stopped) {
931
            return 0;
932
        }
933
        penv = penv->next_cpu;
934
    }
935

    
936
    return 1;
937
}
938

    
939
void pause_all_vcpus(void)
940
{
941
    CPUArchState *penv = first_cpu;
942

    
943
    qemu_clock_enable(vm_clock, false);
944
    while (penv) {
945
        penv->stop = 1;
946
        qemu_cpu_kick(penv);
947
        penv = penv->next_cpu;
948
    }
949

    
950
    if (!qemu_thread_is_self(&io_thread)) {
951
        cpu_stop_current();
952
        if (!kvm_enabled()) {
953
            while (penv) {
954
                penv->stop = 0;
955
                penv->stopped = 1;
956
                penv = penv->next_cpu;
957
            }
958
            return;
959
        }
960
    }
961

    
962
    while (!all_vcpus_paused()) {
963
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
964
        penv = first_cpu;
965
        while (penv) {
966
            qemu_cpu_kick(penv);
967
            penv = penv->next_cpu;
968
        }
969
    }
970
}
971

    
972
void resume_all_vcpus(void)
973
{
974
    CPUArchState *penv = first_cpu;
975

    
976
    qemu_clock_enable(vm_clock, true);
977
    while (penv) {
978
        penv->stop = 0;
979
        penv->stopped = 0;
980
        qemu_cpu_kick(penv);
981
        penv = penv->next_cpu;
982
    }
983
}
984

    
985
static void qemu_tcg_init_vcpu(void *_env)
986
{
987
    CPUArchState *env = _env;
988
    CPUState *cpu = ENV_GET_CPU(env);
989

    
990
    /* share a single thread for all cpus with TCG */
991
    if (!tcg_cpu_thread) {
992
        cpu->thread = g_malloc0(sizeof(QemuThread));
993
        env->halt_cond = g_malloc0(sizeof(QemuCond));
994
        qemu_cond_init(env->halt_cond);
995
        tcg_halt_cond = env->halt_cond;
996
        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, env,
997
                           QEMU_THREAD_JOINABLE);
998
#ifdef _WIN32
999
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1000
#endif
1001
        while (env->created == 0) {
1002
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1003
        }
1004
        tcg_cpu_thread = cpu->thread;
1005
    } else {
1006
        cpu->thread = tcg_cpu_thread;
1007
        env->halt_cond = tcg_halt_cond;
1008
    }
1009
}
1010

    
1011
static void qemu_kvm_start_vcpu(CPUArchState *env)
1012
{
1013
    CPUState *cpu = ENV_GET_CPU(env);
1014

    
1015
    cpu->thread = g_malloc0(sizeof(QemuThread));
1016
    env->halt_cond = g_malloc0(sizeof(QemuCond));
1017
    qemu_cond_init(env->halt_cond);
1018
    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1019
                       QEMU_THREAD_JOINABLE);
1020
    while (env->created == 0) {
1021
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1022
    }
1023
}
1024

    
1025
static void qemu_dummy_start_vcpu(CPUArchState *env)
1026
{
1027
    CPUState *cpu = ENV_GET_CPU(env);
1028

    
1029
    cpu->thread = g_malloc0(sizeof(QemuThread));
1030
    env->halt_cond = g_malloc0(sizeof(QemuCond));
1031
    qemu_cond_init(env->halt_cond);
1032
    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1033
                       QEMU_THREAD_JOINABLE);
1034
    while (env->created == 0) {
1035
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1036
    }
1037
}
1038

    
1039
void qemu_init_vcpu(void *_env)
1040
{
1041
    CPUArchState *env = _env;
1042

    
1043
    env->nr_cores = smp_cores;
1044
    env->nr_threads = smp_threads;
1045
    env->stopped = 1;
1046
    if (kvm_enabled()) {
1047
        qemu_kvm_start_vcpu(env);
1048
    } else if (tcg_enabled()) {
1049
        qemu_tcg_init_vcpu(env);
1050
    } else {
1051
        qemu_dummy_start_vcpu(env);
1052
    }
1053
}
1054

    
1055
void cpu_stop_current(void)
1056
{
1057
    if (cpu_single_env) {
1058
        cpu_single_env->stop = 0;
1059
        cpu_single_env->stopped = 1;
1060
        cpu_exit(cpu_single_env);
1061
        qemu_cond_signal(&qemu_pause_cond);
1062
    }
1063
}
1064

    
1065
void vm_stop(RunState state)
1066
{
1067
    if (!qemu_thread_is_self(&io_thread)) {
1068
        qemu_system_vmstop_request(state);
1069
        /*
1070
         * FIXME: should not return to device code in case
1071
         * vm_stop() has been requested.
1072
         */
1073
        cpu_stop_current();
1074
        return;
1075
    }
1076
    do_vm_stop(state);
1077
}
1078

    
1079
/* does a state transition even if the VM is already stopped,
1080
   current state is forgotten forever */
1081
void vm_stop_force_state(RunState state)
1082
{
1083
    if (runstate_is_running()) {
1084
        vm_stop(state);
1085
    } else {
1086
        runstate_set(state);
1087
    }
1088
}
1089

    
1090
static int tcg_cpu_exec(CPUArchState *env)
1091
{
1092
    int ret;
1093
#ifdef CONFIG_PROFILER
1094
    int64_t ti;
1095
#endif
1096

    
1097
#ifdef CONFIG_PROFILER
1098
    ti = profile_getclock();
1099
#endif
1100
    if (use_icount) {
1101
        int64_t count;
1102
        int decr;
1103
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1104
        env->icount_decr.u16.low = 0;
1105
        env->icount_extra = 0;
1106
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1107
        qemu_icount += count;
1108
        decr = (count > 0xffff) ? 0xffff : count;
1109
        count -= decr;
1110
        env->icount_decr.u16.low = decr;
1111
        env->icount_extra = count;
1112
    }
1113
    ret = cpu_exec(env);
1114
#ifdef CONFIG_PROFILER
1115
    qemu_time += profile_getclock() - ti;
1116
#endif
1117
    if (use_icount) {
1118
        /* Fold pending instructions back into the
1119
           instruction counter, and clear the interrupt flag.  */
1120
        qemu_icount -= (env->icount_decr.u16.low
1121
                        + env->icount_extra);
1122
        env->icount_decr.u32 = 0;
1123
        env->icount_extra = 0;
1124
    }
1125
    return ret;
1126
}
1127

    
1128
static void tcg_exec_all(void)
1129
{
1130
    int r;
1131

    
1132
    /* Account partial waits to the vm_clock.  */
1133
    qemu_clock_warp(vm_clock);
1134

    
1135
    if (next_cpu == NULL) {
1136
        next_cpu = first_cpu;
1137
    }
1138
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1139
        CPUArchState *env = next_cpu;
1140

    
1141
        qemu_clock_enable(vm_clock,
1142
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1143

    
1144
        if (cpu_can_run(env)) {
1145
            r = tcg_cpu_exec(env);
1146
            if (r == EXCP_DEBUG) {
1147
                cpu_handle_guest_debug(env);
1148
                break;
1149
            }
1150
        } else if (env->stop || env->stopped) {
1151
            break;
1152
        }
1153
    }
1154
    exit_request = 0;
1155
}
1156

    
1157
void set_numa_modes(void)
1158
{
1159
    CPUArchState *env;
1160
    int i;
1161

    
1162
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1163
        for (i = 0; i < nb_numa_nodes; i++) {
1164
            if (test_bit(env->cpu_index, node_cpumask[i])) {
1165
                env->numa_node = i;
1166
            }
1167
        }
1168
    }
1169
}
1170

    
1171
void set_cpu_log(const char *optarg)
1172
{
1173
    int mask;
1174
    const CPULogItem *item;
1175

    
1176
    mask = cpu_str_to_log_mask(optarg);
1177
    if (!mask) {
1178
        printf("Log items (comma separated):\n");
1179
        for (item = cpu_log_items; item->mask != 0; item++) {
1180
            printf("%-10s %s\n", item->name, item->help);
1181
        }
1182
        exit(1);
1183
    }
1184
    cpu_set_log(mask);
1185
}
1186

    
1187
void set_cpu_log_filename(const char *optarg)
1188
{
1189
    cpu_set_log_filename(optarg);
1190
}
1191

    
1192
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1193
{
1194
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1195
#if defined(cpu_list_id)
1196
    cpu_list_id(f, cpu_fprintf, optarg);
1197
#elif defined(cpu_list)
1198
    cpu_list(f, cpu_fprintf); /* deprecated */
1199
#endif
1200
}
1201

    
1202
CpuInfoList *qmp_query_cpus(Error **errp)
1203
{
1204
    CpuInfoList *head = NULL, *cur_item = NULL;
1205
    CPUArchState *env;
1206

    
1207
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1208
        CpuInfoList *info;
1209

    
1210
        cpu_synchronize_state(env);
1211

    
1212
        info = g_malloc0(sizeof(*info));
1213
        info->value = g_malloc0(sizeof(*info->value));
1214
        info->value->CPU = env->cpu_index;
1215
        info->value->current = (env == first_cpu);
1216
        info->value->halted = env->halted;
1217
        info->value->thread_id = env->thread_id;
1218
#if defined(TARGET_I386)
1219
        info->value->has_pc = true;
1220
        info->value->pc = env->eip + env->segs[R_CS].base;
1221
#elif defined(TARGET_PPC)
1222
        info->value->has_nip = true;
1223
        info->value->nip = env->nip;
1224
#elif defined(TARGET_SPARC)
1225
        info->value->has_pc = true;
1226
        info->value->pc = env->pc;
1227
        info->value->has_npc = true;
1228
        info->value->npc = env->npc;
1229
#elif defined(TARGET_MIPS)
1230
        info->value->has_PC = true;
1231
        info->value->PC = env->active_tc.PC;
1232
#endif
1233

    
1234
        /* XXX: waiting for the qapi to support GSList */
1235
        if (!cur_item) {
1236
            head = cur_item = info;
1237
        } else {
1238
            cur_item->next = info;
1239
            cur_item = info;
1240
        }
1241
    }
1242

    
1243
    return head;
1244
}
1245

    
1246
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1247
                 bool has_cpu, int64_t cpu_index, Error **errp)
1248
{
1249
    FILE *f;
1250
    uint32_t l;
1251
    CPUArchState *env;
1252
    uint8_t buf[1024];
1253

    
1254
    if (!has_cpu) {
1255
        cpu_index = 0;
1256
    }
1257

    
1258
    for (env = first_cpu; env; env = env->next_cpu) {
1259
        if (cpu_index == env->cpu_index) {
1260
            break;
1261
        }
1262
    }
1263

    
1264
    if (env == NULL) {
1265
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1266
                  "a CPU number");
1267
        return;
1268
    }
1269

    
1270
    f = fopen(filename, "wb");
1271
    if (!f) {
1272
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1273
        return;
1274
    }
1275

    
1276
    while (size != 0) {
1277
        l = sizeof(buf);
1278
        if (l > size)
1279
            l = size;
1280
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1281
        if (fwrite(buf, 1, l, f) != l) {
1282
            error_set(errp, QERR_IO_ERROR);
1283
            goto exit;
1284
        }
1285
        addr += l;
1286
        size -= l;
1287
    }
1288

    
1289
exit:
1290
    fclose(f);
1291
}
1292

    
1293
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1294
                  Error **errp)
1295
{
1296
    FILE *f;
1297
    uint32_t l;
1298
    uint8_t buf[1024];
1299

    
1300
    f = fopen(filename, "wb");
1301
    if (!f) {
1302
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1303
        return;
1304
    }
1305

    
1306
    while (size != 0) {
1307
        l = sizeof(buf);
1308
        if (l > size)
1309
            l = size;
1310
        cpu_physical_memory_rw(addr, buf, l, 0);
1311
        if (fwrite(buf, 1, l, f) != l) {
1312
            error_set(errp, QERR_IO_ERROR);
1313
            goto exit;
1314
        }
1315
        addr += l;
1316
        size -= l;
1317
    }
1318

    
1319
exit:
1320
    fclose(f);
1321
}
1322

    
1323
void qmp_inject_nmi(Error **errp)
1324
{
1325
#if defined(TARGET_I386)
1326
    CPUArchState *env;
1327

    
1328
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1329
        if (!env->apic_state) {
1330
            cpu_interrupt(env, CPU_INTERRUPT_NMI);
1331
        } else {
1332
            apic_deliver_nmi(env->apic_state);
1333
        }
1334
    }
1335
#else
1336
    error_set(errp, QERR_UNSUPPORTED);
1337
#endif
1338
}