Statistics
| Branch: | Revision:

root / cpus.c @ cb446eca

History | View | Annotate | Download (33.8 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor/monitor.h"
29
#include "sysemu/sysemu.h"
30
#include "exec/gdbstub.h"
31
#include "sysemu/dma.h"
32
#include "sysemu/kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu/thread.h"
36
#include "sysemu/cpus.h"
37
#include "sysemu/qtest.h"
38
#include "qemu/main-loop.h"
39
#include "qemu/bitmap.h"
40

    
41
#ifndef _WIN32
42
#include "qemu/compatfd.h"
43
#endif
44

    
45
#ifdef CONFIG_LINUX
46

    
47
#include <sys/prctl.h>
48

    
49
#ifndef PR_MCE_KILL
50
#define PR_MCE_KILL 33
51
#endif
52

    
53
#ifndef PR_MCE_KILL_SET
54
#define PR_MCE_KILL_SET 1
55
#endif
56

    
57
#ifndef PR_MCE_KILL_EARLY
58
#define PR_MCE_KILL_EARLY 1
59
#endif
60

    
61
#endif /* CONFIG_LINUX */
62

    
63
static CPUArchState *next_cpu;
64

    
65
static bool cpu_thread_is_idle(CPUArchState *env)
66
{
67
    CPUState *cpu = ENV_GET_CPU(env);
68

    
69
    if (cpu->stop || cpu->queued_work_first) {
70
        return false;
71
    }
72
    if (cpu->stopped || !runstate_is_running()) {
73
        return true;
74
    }
75
    if (!cpu->halted || qemu_cpu_has_work(cpu) ||
76
        kvm_async_interrupts_enabled()) {
77
        return false;
78
    }
79
    return true;
80
}
81

    
82
static bool all_cpu_threads_idle(void)
83
{
84
    CPUArchState *env;
85

    
86
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
87
        if (!cpu_thread_is_idle(env)) {
88
            return false;
89
        }
90
    }
91
    return true;
92
}
93

    
94
/***********************************************************/
95
/* guest cycle counter */
96

    
97
/* Conversion factor from emulated instructions to virtual clock ticks.  */
98
static int icount_time_shift;
99
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
100
#define MAX_ICOUNT_SHIFT 10
101
/* Compensate for varying guest execution speed.  */
102
static int64_t qemu_icount_bias;
103
static QEMUTimer *icount_rt_timer;
104
static QEMUTimer *icount_vm_timer;
105
static QEMUTimer *icount_warp_timer;
106
static int64_t vm_clock_warp_start;
107
static int64_t qemu_icount;
108

    
109
typedef struct TimersState {
110
    int64_t cpu_ticks_prev;
111
    int64_t cpu_ticks_offset;
112
    int64_t cpu_clock_offset;
113
    int32_t cpu_ticks_enabled;
114
    int64_t dummy;
115
} TimersState;
116

    
117
TimersState timers_state;
118

    
119
/* Return the virtual CPU time, based on the instruction counter.  */
120
int64_t cpu_get_icount(void)
121
{
122
    int64_t icount;
123
    CPUArchState *env = cpu_single_env;
124

    
125
    icount = qemu_icount;
126
    if (env) {
127
        if (!can_do_io(env)) {
128
            fprintf(stderr, "Bad clock read\n");
129
        }
130
        icount -= (env->icount_decr.u16.low + env->icount_extra);
131
    }
132
    return qemu_icount_bias + (icount << icount_time_shift);
133
}
134

    
135
/* return the host CPU cycle counter and handle stop/restart */
136
int64_t cpu_get_ticks(void)
137
{
138
    if (use_icount) {
139
        return cpu_get_icount();
140
    }
141
    if (!timers_state.cpu_ticks_enabled) {
142
        return timers_state.cpu_ticks_offset;
143
    } else {
144
        int64_t ticks;
145
        ticks = cpu_get_real_ticks();
146
        if (timers_state.cpu_ticks_prev > ticks) {
147
            /* Note: non increasing ticks may happen if the host uses
148
               software suspend */
149
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
150
        }
151
        timers_state.cpu_ticks_prev = ticks;
152
        return ticks + timers_state.cpu_ticks_offset;
153
    }
154
}
155

    
156
/* return the host CPU monotonic timer and handle stop/restart */
157
int64_t cpu_get_clock(void)
158
{
159
    int64_t ti;
160
    if (!timers_state.cpu_ticks_enabled) {
161
        return timers_state.cpu_clock_offset;
162
    } else {
163
        ti = get_clock();
164
        return ti + timers_state.cpu_clock_offset;
165
    }
166
}
167

    
168
/* enable cpu_get_ticks() */
169
void cpu_enable_ticks(void)
170
{
171
    if (!timers_state.cpu_ticks_enabled) {
172
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
173
        timers_state.cpu_clock_offset -= get_clock();
174
        timers_state.cpu_ticks_enabled = 1;
175
    }
176
}
177

    
178
/* disable cpu_get_ticks() : the clock is stopped. You must not call
179
   cpu_get_ticks() after that.  */
180
void cpu_disable_ticks(void)
181
{
182
    if (timers_state.cpu_ticks_enabled) {
183
        timers_state.cpu_ticks_offset = cpu_get_ticks();
184
        timers_state.cpu_clock_offset = cpu_get_clock();
185
        timers_state.cpu_ticks_enabled = 0;
186
    }
187
}
188

    
189
/* Correlation between real and virtual time is always going to be
190
   fairly approximate, so ignore small variation.
191
   When the guest is idle real and virtual time will be aligned in
192
   the IO wait loop.  */
193
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
194

    
195
static void icount_adjust(void)
196
{
197
    int64_t cur_time;
198
    int64_t cur_icount;
199
    int64_t delta;
200
    static int64_t last_delta;
201
    /* If the VM is not running, then do nothing.  */
202
    if (!runstate_is_running()) {
203
        return;
204
    }
205
    cur_time = cpu_get_clock();
206
    cur_icount = qemu_get_clock_ns(vm_clock);
207
    delta = cur_icount - cur_time;
208
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
209
    if (delta > 0
210
        && last_delta + ICOUNT_WOBBLE < delta * 2
211
        && icount_time_shift > 0) {
212
        /* The guest is getting too far ahead.  Slow time down.  */
213
        icount_time_shift--;
214
    }
215
    if (delta < 0
216
        && last_delta - ICOUNT_WOBBLE > delta * 2
217
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
218
        /* The guest is getting too far behind.  Speed time up.  */
219
        icount_time_shift++;
220
    }
221
    last_delta = delta;
222
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
223
}
224

    
225
static void icount_adjust_rt(void *opaque)
226
{
227
    qemu_mod_timer(icount_rt_timer,
228
                   qemu_get_clock_ms(rt_clock) + 1000);
229
    icount_adjust();
230
}
231

    
232
static void icount_adjust_vm(void *opaque)
233
{
234
    qemu_mod_timer(icount_vm_timer,
235
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
236
    icount_adjust();
237
}
238

    
239
static int64_t qemu_icount_round(int64_t count)
240
{
241
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
242
}
243

    
244
static void icount_warp_rt(void *opaque)
245
{
246
    if (vm_clock_warp_start == -1) {
247
        return;
248
    }
249

    
250
    if (runstate_is_running()) {
251
        int64_t clock = qemu_get_clock_ns(rt_clock);
252
        int64_t warp_delta = clock - vm_clock_warp_start;
253
        if (use_icount == 1) {
254
            qemu_icount_bias += warp_delta;
255
        } else {
256
            /*
257
             * In adaptive mode, do not let the vm_clock run too
258
             * far ahead of real time.
259
             */
260
            int64_t cur_time = cpu_get_clock();
261
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
262
            int64_t delta = cur_time - cur_icount;
263
            qemu_icount_bias += MIN(warp_delta, delta);
264
        }
265
        if (qemu_clock_expired(vm_clock)) {
266
            qemu_notify_event();
267
        }
268
    }
269
    vm_clock_warp_start = -1;
270
}
271

    
272
void qtest_clock_warp(int64_t dest)
273
{
274
    int64_t clock = qemu_get_clock_ns(vm_clock);
275
    assert(qtest_enabled());
276
    while (clock < dest) {
277
        int64_t deadline = qemu_clock_deadline(vm_clock);
278
        int64_t warp = MIN(dest - clock, deadline);
279
        qemu_icount_bias += warp;
280
        qemu_run_timers(vm_clock);
281
        clock = qemu_get_clock_ns(vm_clock);
282
    }
283
    qemu_notify_event();
284
}
285

    
286
void qemu_clock_warp(QEMUClock *clock)
287
{
288
    int64_t deadline;
289

    
290
    /*
291
     * There are too many global variables to make the "warp" behavior
292
     * applicable to other clocks.  But a clock argument removes the
293
     * need for if statements all over the place.
294
     */
295
    if (clock != vm_clock || !use_icount) {
296
        return;
297
    }
298

    
299
    /*
300
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
301
     * ensures that the deadline for the timer is computed correctly below.
302
     * This also makes sure that the insn counter is synchronized before the
303
     * CPU starts running, in case the CPU is woken by an event other than
304
     * the earliest vm_clock timer.
305
     */
306
    icount_warp_rt(NULL);
307
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
308
        qemu_del_timer(icount_warp_timer);
309
        return;
310
    }
311

    
312
    if (qtest_enabled()) {
313
        /* When testing, qtest commands advance icount.  */
314
        return;
315
    }
316

    
317
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
318
    deadline = qemu_clock_deadline(vm_clock);
319
    if (deadline > 0) {
320
        /*
321
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
322
         * sleep.  Otherwise, the CPU might be waiting for a future timer
323
         * interrupt to wake it up, but the interrupt never comes because
324
         * the vCPU isn't running any insns and thus doesn't advance the
325
         * vm_clock.
326
         *
327
         * An extreme solution for this problem would be to never let VCPUs
328
         * sleep in icount mode if there is a pending vm_clock timer; rather
329
         * time could just advance to the next vm_clock event.  Instead, we
330
         * do stop VCPUs and only advance vm_clock after some "real" time,
331
         * (related to the time left until the next event) has passed.  This
332
         * rt_clock timer will do this.  This avoids that the warps are too
333
         * visible externally---for example, you will not be sending network
334
         * packets continuously instead of every 100ms.
335
         */
336
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
337
    } else {
338
        qemu_notify_event();
339
    }
340
}
341

    
342
static const VMStateDescription vmstate_timers = {
343
    .name = "timer",
344
    .version_id = 2,
345
    .minimum_version_id = 1,
346
    .minimum_version_id_old = 1,
347
    .fields      = (VMStateField[]) {
348
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
349
        VMSTATE_INT64(dummy, TimersState),
350
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
351
        VMSTATE_END_OF_LIST()
352
    }
353
};
354

    
355
void configure_icount(const char *option)
356
{
357
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
358
    if (!option) {
359
        return;
360
    }
361

    
362
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
363
    if (strcmp(option, "auto") != 0) {
364
        icount_time_shift = strtol(option, NULL, 0);
365
        use_icount = 1;
366
        return;
367
    }
368

    
369
    use_icount = 2;
370

    
371
    /* 125MIPS seems a reasonable initial guess at the guest speed.
372
       It will be corrected fairly quickly anyway.  */
373
    icount_time_shift = 3;
374

    
375
    /* Have both realtime and virtual time triggers for speed adjustment.
376
       The realtime trigger catches emulated time passing too slowly,
377
       the virtual time trigger catches emulated time passing too fast.
378
       Realtime triggers occur even when idle, so use them less frequently
379
       than VM triggers.  */
380
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
381
    qemu_mod_timer(icount_rt_timer,
382
                   qemu_get_clock_ms(rt_clock) + 1000);
383
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
384
    qemu_mod_timer(icount_vm_timer,
385
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
386
}
387

    
388
/***********************************************************/
389
void hw_error(const char *fmt, ...)
390
{
391
    va_list ap;
392
    CPUArchState *env;
393
    CPUState *cpu;
394

    
395
    va_start(ap, fmt);
396
    fprintf(stderr, "qemu: hardware error: ");
397
    vfprintf(stderr, fmt, ap);
398
    fprintf(stderr, "\n");
399
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
400
        cpu = ENV_GET_CPU(env);
401
        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
402
        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
403
    }
404
    va_end(ap);
405
    abort();
406
}
407

    
408
void cpu_synchronize_all_states(void)
409
{
410
    CPUArchState *env;
411

    
412
    for (env = first_cpu; env; env = env->next_cpu) {
413
        cpu_synchronize_state(ENV_GET_CPU(env));
414
    }
415
}
416

    
417
void cpu_synchronize_all_post_reset(void)
418
{
419
    CPUArchState *cpu;
420

    
421
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
422
        cpu_synchronize_post_reset(ENV_GET_CPU(cpu));
423
    }
424
}
425

    
426
void cpu_synchronize_all_post_init(void)
427
{
428
    CPUArchState *cpu;
429

    
430
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
431
        cpu_synchronize_post_init(ENV_GET_CPU(cpu));
432
    }
433
}
434

    
435
bool cpu_is_stopped(CPUState *cpu)
436
{
437
    return !runstate_is_running() || cpu->stopped;
438
}
439

    
440
static void do_vm_stop(RunState state)
441
{
442
    if (runstate_is_running()) {
443
        cpu_disable_ticks();
444
        pause_all_vcpus();
445
        runstate_set(state);
446
        vm_state_notify(0, state);
447
        bdrv_drain_all();
448
        bdrv_flush_all();
449
        monitor_protocol_event(QEVENT_STOP, NULL);
450
    }
451
}
452

    
453
static bool cpu_can_run(CPUState *cpu)
454
{
455
    if (cpu->stop) {
456
        return false;
457
    }
458
    if (cpu->stopped || !runstate_is_running()) {
459
        return false;
460
    }
461
    return true;
462
}
463

    
464
static void cpu_handle_guest_debug(CPUArchState *env)
465
{
466
    CPUState *cpu = ENV_GET_CPU(env);
467

    
468
    gdb_set_stop_cpu(env);
469
    qemu_system_debug_request();
470
    cpu->stopped = true;
471
}
472

    
473
static void cpu_signal(int sig)
474
{
475
    if (cpu_single_env) {
476
        cpu_exit(cpu_single_env);
477
    }
478
    exit_request = 1;
479
}
480

    
481
#ifdef CONFIG_LINUX
482
static void sigbus_reraise(void)
483
{
484
    sigset_t set;
485
    struct sigaction action;
486

    
487
    memset(&action, 0, sizeof(action));
488
    action.sa_handler = SIG_DFL;
489
    if (!sigaction(SIGBUS, &action, NULL)) {
490
        raise(SIGBUS);
491
        sigemptyset(&set);
492
        sigaddset(&set, SIGBUS);
493
        sigprocmask(SIG_UNBLOCK, &set, NULL);
494
    }
495
    perror("Failed to re-raise SIGBUS!\n");
496
    abort();
497
}
498

    
499
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
500
                           void *ctx)
501
{
502
    if (kvm_on_sigbus(siginfo->ssi_code,
503
                      (void *)(intptr_t)siginfo->ssi_addr)) {
504
        sigbus_reraise();
505
    }
506
}
507

    
508
static void qemu_init_sigbus(void)
509
{
510
    struct sigaction action;
511

    
512
    memset(&action, 0, sizeof(action));
513
    action.sa_flags = SA_SIGINFO;
514
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
515
    sigaction(SIGBUS, &action, NULL);
516

    
517
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
518
}
519

    
520
static void qemu_kvm_eat_signals(CPUState *cpu)
521
{
522
    struct timespec ts = { 0, 0 };
523
    siginfo_t siginfo;
524
    sigset_t waitset;
525
    sigset_t chkset;
526
    int r;
527

    
528
    sigemptyset(&waitset);
529
    sigaddset(&waitset, SIG_IPI);
530
    sigaddset(&waitset, SIGBUS);
531

    
532
    do {
533
        r = sigtimedwait(&waitset, &siginfo, &ts);
534
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
535
            perror("sigtimedwait");
536
            exit(1);
537
        }
538

    
539
        switch (r) {
540
        case SIGBUS:
541
            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
542
                sigbus_reraise();
543
            }
544
            break;
545
        default:
546
            break;
547
        }
548

    
549
        r = sigpending(&chkset);
550
        if (r == -1) {
551
            perror("sigpending");
552
            exit(1);
553
        }
554
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
555
}
556

    
557
#else /* !CONFIG_LINUX */
558

    
559
static void qemu_init_sigbus(void)
560
{
561
}
562

    
563
static void qemu_kvm_eat_signals(CPUState *cpu)
564
{
565
}
566
#endif /* !CONFIG_LINUX */
567

    
568
#ifndef _WIN32
569
static void dummy_signal(int sig)
570
{
571
}
572

    
573
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
574
{
575
    int r;
576
    sigset_t set;
577
    struct sigaction sigact;
578

    
579
    memset(&sigact, 0, sizeof(sigact));
580
    sigact.sa_handler = dummy_signal;
581
    sigaction(SIG_IPI, &sigact, NULL);
582

    
583
    pthread_sigmask(SIG_BLOCK, NULL, &set);
584
    sigdelset(&set, SIG_IPI);
585
    sigdelset(&set, SIGBUS);
586
    r = kvm_set_signal_mask(env, &set);
587
    if (r) {
588
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
589
        exit(1);
590
    }
591
}
592

    
593
static void qemu_tcg_init_cpu_signals(void)
594
{
595
    sigset_t set;
596
    struct sigaction sigact;
597

    
598
    memset(&sigact, 0, sizeof(sigact));
599
    sigact.sa_handler = cpu_signal;
600
    sigaction(SIG_IPI, &sigact, NULL);
601

    
602
    sigemptyset(&set);
603
    sigaddset(&set, SIG_IPI);
604
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
605
}
606

    
607
#else /* _WIN32 */
608
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
609
{
610
    abort();
611
}
612

    
613
static void qemu_tcg_init_cpu_signals(void)
614
{
615
}
616
#endif /* _WIN32 */
617

    
618
static QemuMutex qemu_global_mutex;
619
static QemuCond qemu_io_proceeded_cond;
620
static bool iothread_requesting_mutex;
621

    
622
static QemuThread io_thread;
623

    
624
static QemuThread *tcg_cpu_thread;
625
static QemuCond *tcg_halt_cond;
626

    
627
/* cpu creation */
628
static QemuCond qemu_cpu_cond;
629
/* system init */
630
static QemuCond qemu_pause_cond;
631
static QemuCond qemu_work_cond;
632

    
633
void qemu_init_cpu_loop(void)
634
{
635
    qemu_init_sigbus();
636
    qemu_cond_init(&qemu_cpu_cond);
637
    qemu_cond_init(&qemu_pause_cond);
638
    qemu_cond_init(&qemu_work_cond);
639
    qemu_cond_init(&qemu_io_proceeded_cond);
640
    qemu_mutex_init(&qemu_global_mutex);
641

    
642
    qemu_thread_get_self(&io_thread);
643
}
644

    
645
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
646
{
647
    struct qemu_work_item wi;
648

    
649
    if (qemu_cpu_is_self(cpu)) {
650
        func(data);
651
        return;
652
    }
653

    
654
    wi.func = func;
655
    wi.data = data;
656
    if (cpu->queued_work_first == NULL) {
657
        cpu->queued_work_first = &wi;
658
    } else {
659
        cpu->queued_work_last->next = &wi;
660
    }
661
    cpu->queued_work_last = &wi;
662
    wi.next = NULL;
663
    wi.done = false;
664

    
665
    qemu_cpu_kick(cpu);
666
    while (!wi.done) {
667
        CPUArchState *self_env = cpu_single_env;
668

    
669
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
670
        cpu_single_env = self_env;
671
    }
672
}
673

    
674
static void flush_queued_work(CPUState *cpu)
675
{
676
    struct qemu_work_item *wi;
677

    
678
    if (cpu->queued_work_first == NULL) {
679
        return;
680
    }
681

    
682
    while ((wi = cpu->queued_work_first)) {
683
        cpu->queued_work_first = wi->next;
684
        wi->func(wi->data);
685
        wi->done = true;
686
    }
687
    cpu->queued_work_last = NULL;
688
    qemu_cond_broadcast(&qemu_work_cond);
689
}
690

    
691
static void qemu_wait_io_event_common(CPUState *cpu)
692
{
693
    if (cpu->stop) {
694
        cpu->stop = false;
695
        cpu->stopped = true;
696
        qemu_cond_signal(&qemu_pause_cond);
697
    }
698
    flush_queued_work(cpu);
699
    cpu->thread_kicked = false;
700
}
701

    
702
static void qemu_tcg_wait_io_event(void)
703
{
704
    CPUArchState *env;
705

    
706
    while (all_cpu_threads_idle()) {
707
       /* Start accounting real time to the virtual clock if the CPUs
708
          are idle.  */
709
        qemu_clock_warp(vm_clock);
710
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
711
    }
712

    
713
    while (iothread_requesting_mutex) {
714
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
715
    }
716

    
717
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
718
        qemu_wait_io_event_common(ENV_GET_CPU(env));
719
    }
720
}
721

    
722
static void qemu_kvm_wait_io_event(CPUArchState *env)
723
{
724
    CPUState *cpu = ENV_GET_CPU(env);
725

    
726
    while (cpu_thread_is_idle(env)) {
727
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
728
    }
729

    
730
    qemu_kvm_eat_signals(cpu);
731
    qemu_wait_io_event_common(cpu);
732
}
733

    
734
static void *qemu_kvm_cpu_thread_fn(void *arg)
735
{
736
    CPUArchState *env = arg;
737
    CPUState *cpu = ENV_GET_CPU(env);
738
    int r;
739

    
740
    qemu_mutex_lock(&qemu_global_mutex);
741
    qemu_thread_get_self(cpu->thread);
742
    cpu->thread_id = qemu_get_thread_id();
743
    cpu_single_env = env;
744

    
745
    r = kvm_init_vcpu(cpu);
746
    if (r < 0) {
747
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
748
        exit(1);
749
    }
750

    
751
    qemu_kvm_init_cpu_signals(env);
752

    
753
    /* signal CPU creation */
754
    cpu->created = true;
755
    qemu_cond_signal(&qemu_cpu_cond);
756

    
757
    while (1) {
758
        if (cpu_can_run(cpu)) {
759
            r = kvm_cpu_exec(env);
760
            if (r == EXCP_DEBUG) {
761
                cpu_handle_guest_debug(env);
762
            }
763
        }
764
        qemu_kvm_wait_io_event(env);
765
    }
766

    
767
    return NULL;
768
}
769

    
770
static void *qemu_dummy_cpu_thread_fn(void *arg)
771
{
772
#ifdef _WIN32
773
    fprintf(stderr, "qtest is not supported under Windows\n");
774
    exit(1);
775
#else
776
    CPUArchState *env = arg;
777
    CPUState *cpu = ENV_GET_CPU(env);
778
    sigset_t waitset;
779
    int r;
780

    
781
    qemu_mutex_lock_iothread();
782
    qemu_thread_get_self(cpu->thread);
783
    cpu->thread_id = qemu_get_thread_id();
784

    
785
    sigemptyset(&waitset);
786
    sigaddset(&waitset, SIG_IPI);
787

    
788
    /* signal CPU creation */
789
    cpu->created = true;
790
    qemu_cond_signal(&qemu_cpu_cond);
791

    
792
    cpu_single_env = env;
793
    while (1) {
794
        cpu_single_env = NULL;
795
        qemu_mutex_unlock_iothread();
796
        do {
797
            int sig;
798
            r = sigwait(&waitset, &sig);
799
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
800
        if (r == -1) {
801
            perror("sigwait");
802
            exit(1);
803
        }
804
        qemu_mutex_lock_iothread();
805
        cpu_single_env = env;
806
        qemu_wait_io_event_common(cpu);
807
    }
808

    
809
    return NULL;
810
#endif
811
}
812

    
813
static void tcg_exec_all(void);
814

    
815
static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
816
{
817
    cpu->thread_id = qemu_get_thread_id();
818
    cpu->created = true;
819
}
820

    
821
static void *qemu_tcg_cpu_thread_fn(void *arg)
822
{
823
    CPUState *cpu = arg;
824
    CPUArchState *env;
825

    
826
    qemu_tcg_init_cpu_signals();
827
    qemu_thread_get_self(cpu->thread);
828

    
829
    qemu_mutex_lock(&qemu_global_mutex);
830
    qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
831
    qemu_cond_signal(&qemu_cpu_cond);
832

    
833
    /* wait for initial kick-off after machine start */
834
    while (ENV_GET_CPU(first_cpu)->stopped) {
835
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
836

    
837
        /* process any pending work */
838
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
839
            qemu_wait_io_event_common(ENV_GET_CPU(env));
840
        }
841
    }
842

    
843
    while (1) {
844
        tcg_exec_all();
845
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
846
            qemu_notify_event();
847
        }
848
        qemu_tcg_wait_io_event();
849
    }
850

    
851
    return NULL;
852
}
853

    
854
static void qemu_cpu_kick_thread(CPUState *cpu)
855
{
856
#ifndef _WIN32
857
    int err;
858

    
859
    err = pthread_kill(cpu->thread->thread, SIG_IPI);
860
    if (err) {
861
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
862
        exit(1);
863
    }
864
#else /* _WIN32 */
865
    if (!qemu_cpu_is_self(cpu)) {
866
        CONTEXT tcgContext;
867

    
868
        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
869
            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
870
                    GetLastError());
871
            exit(1);
872
        }
873

    
874
        /* On multi-core systems, we are not sure that the thread is actually
875
         * suspended until we can get the context.
876
         */
877
        tcgContext.ContextFlags = CONTEXT_CONTROL;
878
        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
879
            continue;
880
        }
881

    
882
        cpu_signal(0);
883

    
884
        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
885
            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
886
                    GetLastError());
887
            exit(1);
888
        }
889
    }
890
#endif
891
}
892

    
893
void qemu_cpu_kick(CPUState *cpu)
894
{
895
    qemu_cond_broadcast(cpu->halt_cond);
896
    if (!tcg_enabled() && !cpu->thread_kicked) {
897
        qemu_cpu_kick_thread(cpu);
898
        cpu->thread_kicked = true;
899
    }
900
}
901

    
902
void qemu_cpu_kick_self(void)
903
{
904
#ifndef _WIN32
905
    assert(cpu_single_env);
906
    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
907

    
908
    if (!cpu_single_cpu->thread_kicked) {
909
        qemu_cpu_kick_thread(cpu_single_cpu);
910
        cpu_single_cpu->thread_kicked = true;
911
    }
912
#else
913
    abort();
914
#endif
915
}
916

    
917
bool qemu_cpu_is_self(CPUState *cpu)
918
{
919
    return qemu_thread_is_self(cpu->thread);
920
}
921

    
922
static bool qemu_in_vcpu_thread(void)
923
{
924
    return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
925
}
926

    
927
void qemu_mutex_lock_iothread(void)
928
{
929
    if (!tcg_enabled()) {
930
        qemu_mutex_lock(&qemu_global_mutex);
931
    } else {
932
        iothread_requesting_mutex = true;
933
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
934
            qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
935
            qemu_mutex_lock(&qemu_global_mutex);
936
        }
937
        iothread_requesting_mutex = false;
938
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
939
    }
940
}
941

    
942
void qemu_mutex_unlock_iothread(void)
943
{
944
    qemu_mutex_unlock(&qemu_global_mutex);
945
}
946

    
947
static int all_vcpus_paused(void)
948
{
949
    CPUArchState *penv = first_cpu;
950

    
951
    while (penv) {
952
        CPUState *pcpu = ENV_GET_CPU(penv);
953
        if (!pcpu->stopped) {
954
            return 0;
955
        }
956
        penv = penv->next_cpu;
957
    }
958

    
959
    return 1;
960
}
961

    
962
void pause_all_vcpus(void)
963
{
964
    CPUArchState *penv = first_cpu;
965

    
966
    qemu_clock_enable(vm_clock, false);
967
    while (penv) {
968
        CPUState *pcpu = ENV_GET_CPU(penv);
969
        pcpu->stop = true;
970
        qemu_cpu_kick(pcpu);
971
        penv = penv->next_cpu;
972
    }
973

    
974
    if (qemu_in_vcpu_thread()) {
975
        cpu_stop_current();
976
        if (!kvm_enabled()) {
977
            penv = first_cpu;
978
            while (penv) {
979
                CPUState *pcpu = ENV_GET_CPU(penv);
980
                pcpu->stop = false;
981
                pcpu->stopped = true;
982
                penv = penv->next_cpu;
983
            }
984
            return;
985
        }
986
    }
987

    
988
    while (!all_vcpus_paused()) {
989
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
990
        penv = first_cpu;
991
        while (penv) {
992
            qemu_cpu_kick(ENV_GET_CPU(penv));
993
            penv = penv->next_cpu;
994
        }
995
    }
996
}
997

    
998
void cpu_resume(CPUState *cpu)
999
{
1000
    cpu->stop = false;
1001
    cpu->stopped = false;
1002
    qemu_cpu_kick(cpu);
1003
}
1004

    
1005
void resume_all_vcpus(void)
1006
{
1007
    CPUArchState *penv = first_cpu;
1008

    
1009
    qemu_clock_enable(vm_clock, true);
1010
    while (penv) {
1011
        CPUState *pcpu = ENV_GET_CPU(penv);
1012
        cpu_resume(pcpu);
1013
        penv = penv->next_cpu;
1014
    }
1015
}
1016

    
1017
static void qemu_tcg_init_vcpu(CPUState *cpu)
1018
{
1019
    /* share a single thread for all cpus with TCG */
1020
    if (!tcg_cpu_thread) {
1021
        cpu->thread = g_malloc0(sizeof(QemuThread));
1022
        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1023
        qemu_cond_init(cpu->halt_cond);
1024
        tcg_halt_cond = cpu->halt_cond;
1025
        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1026
                           QEMU_THREAD_JOINABLE);
1027
#ifdef _WIN32
1028
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1029
#endif
1030
        while (!cpu->created) {
1031
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1032
        }
1033
        tcg_cpu_thread = cpu->thread;
1034
    } else {
1035
        cpu->thread = tcg_cpu_thread;
1036
        cpu->halt_cond = tcg_halt_cond;
1037
    }
1038
}
1039

    
1040
static void qemu_kvm_start_vcpu(CPUArchState *env)
1041
{
1042
    CPUState *cpu = ENV_GET_CPU(env);
1043

    
1044
    cpu->thread = g_malloc0(sizeof(QemuThread));
1045
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1046
    qemu_cond_init(cpu->halt_cond);
1047
    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1048
                       QEMU_THREAD_JOINABLE);
1049
    while (!cpu->created) {
1050
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1051
    }
1052
}
1053

    
1054
static void qemu_dummy_start_vcpu(CPUArchState *env)
1055
{
1056
    CPUState *cpu = ENV_GET_CPU(env);
1057

    
1058
    cpu->thread = g_malloc0(sizeof(QemuThread));
1059
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1060
    qemu_cond_init(cpu->halt_cond);
1061
    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1062
                       QEMU_THREAD_JOINABLE);
1063
    while (!cpu->created) {
1064
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1065
    }
1066
}
1067

    
1068
void qemu_init_vcpu(void *_env)
1069
{
1070
    CPUArchState *env = _env;
1071
    CPUState *cpu = ENV_GET_CPU(env);
1072

    
1073
    cpu->nr_cores = smp_cores;
1074
    cpu->nr_threads = smp_threads;
1075
    cpu->stopped = true;
1076
    if (kvm_enabled()) {
1077
        qemu_kvm_start_vcpu(env);
1078
    } else if (tcg_enabled()) {
1079
        qemu_tcg_init_vcpu(cpu);
1080
    } else {
1081
        qemu_dummy_start_vcpu(env);
1082
    }
1083
}
1084

    
1085
void cpu_stop_current(void)
1086
{
1087
    if (cpu_single_env) {
1088
        CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
1089
        cpu_single_cpu->stop = false;
1090
        cpu_single_cpu->stopped = true;
1091
        cpu_exit(cpu_single_env);
1092
        qemu_cond_signal(&qemu_pause_cond);
1093
    }
1094
}
1095

    
1096
void vm_stop(RunState state)
1097
{
1098
    if (qemu_in_vcpu_thread()) {
1099
        qemu_system_vmstop_request(state);
1100
        /*
1101
         * FIXME: should not return to device code in case
1102
         * vm_stop() has been requested.
1103
         */
1104
        cpu_stop_current();
1105
        return;
1106
    }
1107
    do_vm_stop(state);
1108
}
1109

    
1110
/* does a state transition even if the VM is already stopped,
1111
   current state is forgotten forever */
1112
void vm_stop_force_state(RunState state)
1113
{
1114
    if (runstate_is_running()) {
1115
        vm_stop(state);
1116
    } else {
1117
        runstate_set(state);
1118
    }
1119
}
1120

    
1121
static int tcg_cpu_exec(CPUArchState *env)
1122
{
1123
    int ret;
1124
#ifdef CONFIG_PROFILER
1125
    int64_t ti;
1126
#endif
1127

    
1128
#ifdef CONFIG_PROFILER
1129
    ti = profile_getclock();
1130
#endif
1131
    if (use_icount) {
1132
        int64_t count;
1133
        int decr;
1134
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1135
        env->icount_decr.u16.low = 0;
1136
        env->icount_extra = 0;
1137
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1138
        qemu_icount += count;
1139
        decr = (count > 0xffff) ? 0xffff : count;
1140
        count -= decr;
1141
        env->icount_decr.u16.low = decr;
1142
        env->icount_extra = count;
1143
    }
1144
    ret = cpu_exec(env);
1145
#ifdef CONFIG_PROFILER
1146
    qemu_time += profile_getclock() - ti;
1147
#endif
1148
    if (use_icount) {
1149
        /* Fold pending instructions back into the
1150
           instruction counter, and clear the interrupt flag.  */
1151
        qemu_icount -= (env->icount_decr.u16.low
1152
                        + env->icount_extra);
1153
        env->icount_decr.u32 = 0;
1154
        env->icount_extra = 0;
1155
    }
1156
    return ret;
1157
}
1158

    
1159
static void tcg_exec_all(void)
1160
{
1161
    int r;
1162

    
1163
    /* Account partial waits to the vm_clock.  */
1164
    qemu_clock_warp(vm_clock);
1165

    
1166
    if (next_cpu == NULL) {
1167
        next_cpu = first_cpu;
1168
    }
1169
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1170
        CPUArchState *env = next_cpu;
1171
        CPUState *cpu = ENV_GET_CPU(env);
1172

    
1173
        qemu_clock_enable(vm_clock,
1174
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1175

    
1176
        if (cpu_can_run(cpu)) {
1177
            r = tcg_cpu_exec(env);
1178
            if (r == EXCP_DEBUG) {
1179
                cpu_handle_guest_debug(env);
1180
                break;
1181
            }
1182
        } else if (cpu->stop || cpu->stopped) {
1183
            break;
1184
        }
1185
    }
1186
    exit_request = 0;
1187
}
1188

    
1189
void set_numa_modes(void)
1190
{
1191
    CPUArchState *env;
1192
    CPUState *cpu;
1193
    int i;
1194

    
1195
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1196
        cpu = ENV_GET_CPU(env);
1197
        for (i = 0; i < nb_numa_nodes; i++) {
1198
            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1199
                cpu->numa_node = i;
1200
            }
1201
        }
1202
    }
1203
}
1204

    
1205
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1206
{
1207
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1208
#if defined(cpu_list)
1209
    cpu_list(f, cpu_fprintf);
1210
#endif
1211
}
1212

    
1213
CpuInfoList *qmp_query_cpus(Error **errp)
1214
{
1215
    CpuInfoList *head = NULL, *cur_item = NULL;
1216
    CPUArchState *env;
1217

    
1218
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1219
        CPUState *cpu = ENV_GET_CPU(env);
1220
        CpuInfoList *info;
1221

    
1222
        cpu_synchronize_state(cpu);
1223

    
1224
        info = g_malloc0(sizeof(*info));
1225
        info->value = g_malloc0(sizeof(*info->value));
1226
        info->value->CPU = cpu->cpu_index;
1227
        info->value->current = (env == first_cpu);
1228
        info->value->halted = cpu->halted;
1229
        info->value->thread_id = cpu->thread_id;
1230
#if defined(TARGET_I386)
1231
        info->value->has_pc = true;
1232
        info->value->pc = env->eip + env->segs[R_CS].base;
1233
#elif defined(TARGET_PPC)
1234
        info->value->has_nip = true;
1235
        info->value->nip = env->nip;
1236
#elif defined(TARGET_SPARC)
1237
        info->value->has_pc = true;
1238
        info->value->pc = env->pc;
1239
        info->value->has_npc = true;
1240
        info->value->npc = env->npc;
1241
#elif defined(TARGET_MIPS)
1242
        info->value->has_PC = true;
1243
        info->value->PC = env->active_tc.PC;
1244
#endif
1245

    
1246
        /* XXX: waiting for the qapi to support GSList */
1247
        if (!cur_item) {
1248
            head = cur_item = info;
1249
        } else {
1250
            cur_item->next = info;
1251
            cur_item = info;
1252
        }
1253
    }
1254

    
1255
    return head;
1256
}
1257

    
1258
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1259
                 bool has_cpu, int64_t cpu_index, Error **errp)
1260
{
1261
    FILE *f;
1262
    uint32_t l;
1263
    CPUArchState *env;
1264
    CPUState *cpu;
1265
    uint8_t buf[1024];
1266

    
1267
    if (!has_cpu) {
1268
        cpu_index = 0;
1269
    }
1270

    
1271
    cpu = qemu_get_cpu(cpu_index);
1272
    if (cpu == NULL) {
1273
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1274
                  "a CPU number");
1275
        return;
1276
    }
1277
    env = cpu->env_ptr;
1278

    
1279
    f = fopen(filename, "wb");
1280
    if (!f) {
1281
        error_setg_file_open(errp, errno, filename);
1282
        return;
1283
    }
1284

    
1285
    while (size != 0) {
1286
        l = sizeof(buf);
1287
        if (l > size)
1288
            l = size;
1289
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1290
        if (fwrite(buf, 1, l, f) != l) {
1291
            error_set(errp, QERR_IO_ERROR);
1292
            goto exit;
1293
        }
1294
        addr += l;
1295
        size -= l;
1296
    }
1297

    
1298
exit:
1299
    fclose(f);
1300
}
1301

    
1302
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1303
                  Error **errp)
1304
{
1305
    FILE *f;
1306
    uint32_t l;
1307
    uint8_t buf[1024];
1308

    
1309
    f = fopen(filename, "wb");
1310
    if (!f) {
1311
        error_setg_file_open(errp, errno, filename);
1312
        return;
1313
    }
1314

    
1315
    while (size != 0) {
1316
        l = sizeof(buf);
1317
        if (l > size)
1318
            l = size;
1319
        cpu_physical_memory_rw(addr, buf, l, 0);
1320
        if (fwrite(buf, 1, l, f) != l) {
1321
            error_set(errp, QERR_IO_ERROR);
1322
            goto exit;
1323
        }
1324
        addr += l;
1325
        size -= l;
1326
    }
1327

    
1328
exit:
1329
    fclose(f);
1330
}
1331

    
1332
void qmp_inject_nmi(Error **errp)
1333
{
1334
#if defined(TARGET_I386)
1335
    CPUArchState *env;
1336

    
1337
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1338
        if (!env->apic_state) {
1339
            cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
1340
        } else {
1341
            apic_deliver_nmi(env->apic_state);
1342
        }
1343
    }
1344
#else
1345
    error_set(errp, QERR_UNSUPPORTED);
1346
#endif
1347
}