Statistics
| Branch: | Revision:

root / cpus.c @ 3f24a58f

History | View | Annotate | Download (33.7 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor/monitor.h"
29
#include "sysemu/sysemu.h"
30
#include "exec/gdbstub.h"
31
#include "sysemu/dma.h"
32
#include "sysemu/kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu/thread.h"
36
#include "sysemu/cpus.h"
37
#include "sysemu/qtest.h"
38
#include "qemu/main-loop.h"
39
#include "qemu/bitmap.h"
40

    
41
#ifndef _WIN32
42
#include "qemu/compatfd.h"
43
#endif
44

    
45
#ifdef CONFIG_LINUX
46

    
47
#include <sys/prctl.h>
48

    
49
#ifndef PR_MCE_KILL
50
#define PR_MCE_KILL 33
51
#endif
52

    
53
#ifndef PR_MCE_KILL_SET
54
#define PR_MCE_KILL_SET 1
55
#endif
56

    
57
#ifndef PR_MCE_KILL_EARLY
58
#define PR_MCE_KILL_EARLY 1
59
#endif
60

    
61
#endif /* CONFIG_LINUX */
62

    
63
static CPUArchState *next_cpu;
64

    
65
static bool cpu_thread_is_idle(CPUArchState *env)
66
{
67
    CPUState *cpu = ENV_GET_CPU(env);
68

    
69
    if (cpu->stop || cpu->queued_work_first) {
70
        return false;
71
    }
72
    if (cpu->stopped || !runstate_is_running()) {
73
        return true;
74
    }
75
    if (!cpu->halted || qemu_cpu_has_work(cpu) ||
76
        kvm_async_interrupts_enabled()) {
77
        return false;
78
    }
79
    return true;
80
}
81

    
82
static bool all_cpu_threads_idle(void)
83
{
84
    CPUArchState *env;
85

    
86
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
87
        if (!cpu_thread_is_idle(env)) {
88
            return false;
89
        }
90
    }
91
    return true;
92
}
93

    
94
/***********************************************************/
95
/* guest cycle counter */
96

    
97
/* Conversion factor from emulated instructions to virtual clock ticks.  */
98
static int icount_time_shift;
99
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
100
#define MAX_ICOUNT_SHIFT 10
101
/* Compensate for varying guest execution speed.  */
102
static int64_t qemu_icount_bias;
103
static QEMUTimer *icount_rt_timer;
104
static QEMUTimer *icount_vm_timer;
105
static QEMUTimer *icount_warp_timer;
106
static int64_t vm_clock_warp_start;
107
static int64_t qemu_icount;
108

    
109
typedef struct TimersState {
110
    int64_t cpu_ticks_prev;
111
    int64_t cpu_ticks_offset;
112
    int64_t cpu_clock_offset;
113
    int32_t cpu_ticks_enabled;
114
    int64_t dummy;
115
} TimersState;
116

    
117
TimersState timers_state;
118

    
119
/* Return the virtual CPU time, based on the instruction counter.  */
120
int64_t cpu_get_icount(void)
121
{
122
    int64_t icount;
123
    CPUArchState *env = cpu_single_env;
124

    
125
    icount = qemu_icount;
126
    if (env) {
127
        if (!can_do_io(env)) {
128
            fprintf(stderr, "Bad clock read\n");
129
        }
130
        icount -= (env->icount_decr.u16.low + env->icount_extra);
131
    }
132
    return qemu_icount_bias + (icount << icount_time_shift);
133
}
134

    
135
/* return the host CPU cycle counter and handle stop/restart */
136
int64_t cpu_get_ticks(void)
137
{
138
    if (use_icount) {
139
        return cpu_get_icount();
140
    }
141
    if (!timers_state.cpu_ticks_enabled) {
142
        return timers_state.cpu_ticks_offset;
143
    } else {
144
        int64_t ticks;
145
        ticks = cpu_get_real_ticks();
146
        if (timers_state.cpu_ticks_prev > ticks) {
147
            /* Note: non increasing ticks may happen if the host uses
148
               software suspend */
149
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
150
        }
151
        timers_state.cpu_ticks_prev = ticks;
152
        return ticks + timers_state.cpu_ticks_offset;
153
    }
154
}
155

    
156
/* return the host CPU monotonic timer and handle stop/restart */
157
int64_t cpu_get_clock(void)
158
{
159
    int64_t ti;
160
    if (!timers_state.cpu_ticks_enabled) {
161
        return timers_state.cpu_clock_offset;
162
    } else {
163
        ti = get_clock();
164
        return ti + timers_state.cpu_clock_offset;
165
    }
166
}
167

    
168
/* enable cpu_get_ticks() */
169
void cpu_enable_ticks(void)
170
{
171
    if (!timers_state.cpu_ticks_enabled) {
172
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
173
        timers_state.cpu_clock_offset -= get_clock();
174
        timers_state.cpu_ticks_enabled = 1;
175
    }
176
}
177

    
178
/* disable cpu_get_ticks() : the clock is stopped. You must not call
179
   cpu_get_ticks() after that.  */
180
void cpu_disable_ticks(void)
181
{
182
    if (timers_state.cpu_ticks_enabled) {
183
        timers_state.cpu_ticks_offset = cpu_get_ticks();
184
        timers_state.cpu_clock_offset = cpu_get_clock();
185
        timers_state.cpu_ticks_enabled = 0;
186
    }
187
}
188

    
189
/* Correlation between real and virtual time is always going to be
190
   fairly approximate, so ignore small variation.
191
   When the guest is idle real and virtual time will be aligned in
192
   the IO wait loop.  */
193
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
194

    
195
static void icount_adjust(void)
196
{
197
    int64_t cur_time;
198
    int64_t cur_icount;
199
    int64_t delta;
200
    static int64_t last_delta;
201
    /* If the VM is not running, then do nothing.  */
202
    if (!runstate_is_running()) {
203
        return;
204
    }
205
    cur_time = cpu_get_clock();
206
    cur_icount = qemu_get_clock_ns(vm_clock);
207
    delta = cur_icount - cur_time;
208
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
209
    if (delta > 0
210
        && last_delta + ICOUNT_WOBBLE < delta * 2
211
        && icount_time_shift > 0) {
212
        /* The guest is getting too far ahead.  Slow time down.  */
213
        icount_time_shift--;
214
    }
215
    if (delta < 0
216
        && last_delta - ICOUNT_WOBBLE > delta * 2
217
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
218
        /* The guest is getting too far behind.  Speed time up.  */
219
        icount_time_shift++;
220
    }
221
    last_delta = delta;
222
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
223
}
224

    
225
static void icount_adjust_rt(void *opaque)
226
{
227
    qemu_mod_timer(icount_rt_timer,
228
                   qemu_get_clock_ms(rt_clock) + 1000);
229
    icount_adjust();
230
}
231

    
232
static void icount_adjust_vm(void *opaque)
233
{
234
    qemu_mod_timer(icount_vm_timer,
235
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
236
    icount_adjust();
237
}
238

    
239
static int64_t qemu_icount_round(int64_t count)
240
{
241
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
242
}
243

    
244
static void icount_warp_rt(void *opaque)
245
{
246
    if (vm_clock_warp_start == -1) {
247
        return;
248
    }
249

    
250
    if (runstate_is_running()) {
251
        int64_t clock = qemu_get_clock_ns(rt_clock);
252
        int64_t warp_delta = clock - vm_clock_warp_start;
253
        if (use_icount == 1) {
254
            qemu_icount_bias += warp_delta;
255
        } else {
256
            /*
257
             * In adaptive mode, do not let the vm_clock run too
258
             * far ahead of real time.
259
             */
260
            int64_t cur_time = cpu_get_clock();
261
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
262
            int64_t delta = cur_time - cur_icount;
263
            qemu_icount_bias += MIN(warp_delta, delta);
264
        }
265
        if (qemu_clock_expired(vm_clock)) {
266
            qemu_notify_event();
267
        }
268
    }
269
    vm_clock_warp_start = -1;
270
}
271

    
272
void qtest_clock_warp(int64_t dest)
273
{
274
    int64_t clock = qemu_get_clock_ns(vm_clock);
275
    assert(qtest_enabled());
276
    while (clock < dest) {
277
        int64_t deadline = qemu_clock_deadline(vm_clock);
278
        int64_t warp = MIN(dest - clock, deadline);
279
        qemu_icount_bias += warp;
280
        qemu_run_timers(vm_clock);
281
        clock = qemu_get_clock_ns(vm_clock);
282
    }
283
    qemu_notify_event();
284
}
285

    
286
void qemu_clock_warp(QEMUClock *clock)
287
{
288
    int64_t deadline;
289

    
290
    /*
291
     * There are too many global variables to make the "warp" behavior
292
     * applicable to other clocks.  But a clock argument removes the
293
     * need for if statements all over the place.
294
     */
295
    if (clock != vm_clock || !use_icount) {
296
        return;
297
    }
298

    
299
    /*
300
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
301
     * ensures that the deadline for the timer is computed correctly below.
302
     * This also makes sure that the insn counter is synchronized before the
303
     * CPU starts running, in case the CPU is woken by an event other than
304
     * the earliest vm_clock timer.
305
     */
306
    icount_warp_rt(NULL);
307
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
308
        qemu_del_timer(icount_warp_timer);
309
        return;
310
    }
311

    
312
    if (qtest_enabled()) {
313
        /* When testing, qtest commands advance icount.  */
314
        return;
315
    }
316

    
317
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
318
    deadline = qemu_clock_deadline(vm_clock);
319
    if (deadline > 0) {
320
        /*
321
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
322
         * sleep.  Otherwise, the CPU might be waiting for a future timer
323
         * interrupt to wake it up, but the interrupt never comes because
324
         * the vCPU isn't running any insns and thus doesn't advance the
325
         * vm_clock.
326
         *
327
         * An extreme solution for this problem would be to never let VCPUs
328
         * sleep in icount mode if there is a pending vm_clock timer; rather
329
         * time could just advance to the next vm_clock event.  Instead, we
330
         * do stop VCPUs and only advance vm_clock after some "real" time,
331
         * (related to the time left until the next event) has passed.  This
332
         * rt_clock timer will do this.  This avoids that the warps are too
333
         * visible externally---for example, you will not be sending network
334
         * packets continuously instead of every 100ms.
335
         */
336
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
337
    } else {
338
        qemu_notify_event();
339
    }
340
}
341

    
342
static const VMStateDescription vmstate_timers = {
343
    .name = "timer",
344
    .version_id = 2,
345
    .minimum_version_id = 1,
346
    .minimum_version_id_old = 1,
347
    .fields      = (VMStateField[]) {
348
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
349
        VMSTATE_INT64(dummy, TimersState),
350
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
351
        VMSTATE_END_OF_LIST()
352
    }
353
};
354

    
355
void configure_icount(const char *option)
356
{
357
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
358
    if (!option) {
359
        return;
360
    }
361

    
362
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
363
    if (strcmp(option, "auto") != 0) {
364
        icount_time_shift = strtol(option, NULL, 0);
365
        use_icount = 1;
366
        return;
367
    }
368

    
369
    use_icount = 2;
370

    
371
    /* 125MIPS seems a reasonable initial guess at the guest speed.
372
       It will be corrected fairly quickly anyway.  */
373
    icount_time_shift = 3;
374

    
375
    /* Have both realtime and virtual time triggers for speed adjustment.
376
       The realtime trigger catches emulated time passing too slowly,
377
       the virtual time trigger catches emulated time passing too fast.
378
       Realtime triggers occur even when idle, so use them less frequently
379
       than VM triggers.  */
380
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
381
    qemu_mod_timer(icount_rt_timer,
382
                   qemu_get_clock_ms(rt_clock) + 1000);
383
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
384
    qemu_mod_timer(icount_vm_timer,
385
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
386
}
387

    
388
/***********************************************************/
389
void hw_error(const char *fmt, ...)
390
{
391
    va_list ap;
392
    CPUArchState *env;
393
    CPUState *cpu;
394

    
395
    va_start(ap, fmt);
396
    fprintf(stderr, "qemu: hardware error: ");
397
    vfprintf(stderr, fmt, ap);
398
    fprintf(stderr, "\n");
399
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
400
        cpu = ENV_GET_CPU(env);
401
        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
402
        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
403
    }
404
    va_end(ap);
405
    abort();
406
}
407

    
408
void cpu_synchronize_all_states(void)
409
{
410
    CPUArchState *cpu;
411

    
412
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
413
        cpu_synchronize_state(cpu);
414
    }
415
}
416

    
417
void cpu_synchronize_all_post_reset(void)
418
{
419
    CPUArchState *cpu;
420

    
421
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
422
        cpu_synchronize_post_reset(ENV_GET_CPU(cpu));
423
    }
424
}
425

    
426
void cpu_synchronize_all_post_init(void)
427
{
428
    CPUArchState *cpu;
429

    
430
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
431
        cpu_synchronize_post_init(ENV_GET_CPU(cpu));
432
    }
433
}
434

    
435
bool cpu_is_stopped(CPUState *cpu)
436
{
437
    return !runstate_is_running() || cpu->stopped;
438
}
439

    
440
static void do_vm_stop(RunState state)
441
{
442
    if (runstate_is_running()) {
443
        cpu_disable_ticks();
444
        pause_all_vcpus();
445
        runstate_set(state);
446
        vm_state_notify(0, state);
447
        bdrv_drain_all();
448
        bdrv_flush_all();
449
        monitor_protocol_event(QEVENT_STOP, NULL);
450
    }
451
}
452

    
453
static bool cpu_can_run(CPUState *cpu)
454
{
455
    if (cpu->stop) {
456
        return false;
457
    }
458
    if (cpu->stopped || !runstate_is_running()) {
459
        return false;
460
    }
461
    return true;
462
}
463

    
464
static void cpu_handle_guest_debug(CPUArchState *env)
465
{
466
    CPUState *cpu = ENV_GET_CPU(env);
467

    
468
    gdb_set_stop_cpu(env);
469
    qemu_system_debug_request();
470
    cpu->stopped = true;
471
}
472

    
473
static void cpu_signal(int sig)
474
{
475
    if (cpu_single_env) {
476
        cpu_exit(cpu_single_env);
477
    }
478
    exit_request = 1;
479
}
480

    
481
#ifdef CONFIG_LINUX
482
static void sigbus_reraise(void)
483
{
484
    sigset_t set;
485
    struct sigaction action;
486

    
487
    memset(&action, 0, sizeof(action));
488
    action.sa_handler = SIG_DFL;
489
    if (!sigaction(SIGBUS, &action, NULL)) {
490
        raise(SIGBUS);
491
        sigemptyset(&set);
492
        sigaddset(&set, SIGBUS);
493
        sigprocmask(SIG_UNBLOCK, &set, NULL);
494
    }
495
    perror("Failed to re-raise SIGBUS!\n");
496
    abort();
497
}
498

    
499
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
500
                           void *ctx)
501
{
502
    if (kvm_on_sigbus(siginfo->ssi_code,
503
                      (void *)(intptr_t)siginfo->ssi_addr)) {
504
        sigbus_reraise();
505
    }
506
}
507

    
508
static void qemu_init_sigbus(void)
509
{
510
    struct sigaction action;
511

    
512
    memset(&action, 0, sizeof(action));
513
    action.sa_flags = SA_SIGINFO;
514
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
515
    sigaction(SIGBUS, &action, NULL);
516

    
517
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
518
}
519

    
520
static void qemu_kvm_eat_signals(CPUState *cpu)
521
{
522
    struct timespec ts = { 0, 0 };
523
    siginfo_t siginfo;
524
    sigset_t waitset;
525
    sigset_t chkset;
526
    int r;
527

    
528
    sigemptyset(&waitset);
529
    sigaddset(&waitset, SIG_IPI);
530
    sigaddset(&waitset, SIGBUS);
531

    
532
    do {
533
        r = sigtimedwait(&waitset, &siginfo, &ts);
534
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
535
            perror("sigtimedwait");
536
            exit(1);
537
        }
538

    
539
        switch (r) {
540
        case SIGBUS:
541
            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
542
                sigbus_reraise();
543
            }
544
            break;
545
        default:
546
            break;
547
        }
548

    
549
        r = sigpending(&chkset);
550
        if (r == -1) {
551
            perror("sigpending");
552
            exit(1);
553
        }
554
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
555
}
556

    
557
#else /* !CONFIG_LINUX */
558

    
559
static void qemu_init_sigbus(void)
560
{
561
}
562

    
563
static void qemu_kvm_eat_signals(CPUState *cpu)
564
{
565
}
566
#endif /* !CONFIG_LINUX */
567

    
568
#ifndef _WIN32
569
static void dummy_signal(int sig)
570
{
571
}
572

    
573
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
574
{
575
    int r;
576
    sigset_t set;
577
    struct sigaction sigact;
578

    
579
    memset(&sigact, 0, sizeof(sigact));
580
    sigact.sa_handler = dummy_signal;
581
    sigaction(SIG_IPI, &sigact, NULL);
582

    
583
    pthread_sigmask(SIG_BLOCK, NULL, &set);
584
    sigdelset(&set, SIG_IPI);
585
    sigdelset(&set, SIGBUS);
586
    r = kvm_set_signal_mask(env, &set);
587
    if (r) {
588
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
589
        exit(1);
590
    }
591
}
592

    
593
static void qemu_tcg_init_cpu_signals(void)
594
{
595
    sigset_t set;
596
    struct sigaction sigact;
597

    
598
    memset(&sigact, 0, sizeof(sigact));
599
    sigact.sa_handler = cpu_signal;
600
    sigaction(SIG_IPI, &sigact, NULL);
601

    
602
    sigemptyset(&set);
603
    sigaddset(&set, SIG_IPI);
604
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
605
}
606

    
607
#else /* _WIN32 */
608
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
609
{
610
    abort();
611
}
612

    
613
static void qemu_tcg_init_cpu_signals(void)
614
{
615
}
616
#endif /* _WIN32 */
617

    
618
static QemuMutex qemu_global_mutex;
619
static QemuCond qemu_io_proceeded_cond;
620
static bool iothread_requesting_mutex;
621

    
622
static QemuThread io_thread;
623

    
624
static QemuThread *tcg_cpu_thread;
625
static QemuCond *tcg_halt_cond;
626

    
627
/* cpu creation */
628
static QemuCond qemu_cpu_cond;
629
/* system init */
630
static QemuCond qemu_pause_cond;
631
static QemuCond qemu_work_cond;
632

    
633
void qemu_init_cpu_loop(void)
634
{
635
    qemu_init_sigbus();
636
    qemu_cond_init(&qemu_cpu_cond);
637
    qemu_cond_init(&qemu_pause_cond);
638
    qemu_cond_init(&qemu_work_cond);
639
    qemu_cond_init(&qemu_io_proceeded_cond);
640
    qemu_mutex_init(&qemu_global_mutex);
641

    
642
    qemu_thread_get_self(&io_thread);
643
}
644

    
645
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
646
{
647
    struct qemu_work_item wi;
648

    
649
    if (qemu_cpu_is_self(cpu)) {
650
        func(data);
651
        return;
652
    }
653

    
654
    wi.func = func;
655
    wi.data = data;
656
    if (cpu->queued_work_first == NULL) {
657
        cpu->queued_work_first = &wi;
658
    } else {
659
        cpu->queued_work_last->next = &wi;
660
    }
661
    cpu->queued_work_last = &wi;
662
    wi.next = NULL;
663
    wi.done = false;
664

    
665
    qemu_cpu_kick(cpu);
666
    while (!wi.done) {
667
        CPUArchState *self_env = cpu_single_env;
668

    
669
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
670
        cpu_single_env = self_env;
671
    }
672
}
673

    
674
static void flush_queued_work(CPUState *cpu)
675
{
676
    struct qemu_work_item *wi;
677

    
678
    if (cpu->queued_work_first == NULL) {
679
        return;
680
    }
681

    
682
    while ((wi = cpu->queued_work_first)) {
683
        cpu->queued_work_first = wi->next;
684
        wi->func(wi->data);
685
        wi->done = true;
686
    }
687
    cpu->queued_work_last = NULL;
688
    qemu_cond_broadcast(&qemu_work_cond);
689
}
690

    
691
static void qemu_wait_io_event_common(CPUState *cpu)
692
{
693
    if (cpu->stop) {
694
        cpu->stop = false;
695
        cpu->stopped = true;
696
        qemu_cond_signal(&qemu_pause_cond);
697
    }
698
    flush_queued_work(cpu);
699
    cpu->thread_kicked = false;
700
}
701

    
702
static void qemu_tcg_wait_io_event(void)
703
{
704
    CPUArchState *env;
705

    
706
    while (all_cpu_threads_idle()) {
707
       /* Start accounting real time to the virtual clock if the CPUs
708
          are idle.  */
709
        qemu_clock_warp(vm_clock);
710
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
711
    }
712

    
713
    while (iothread_requesting_mutex) {
714
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
715
    }
716

    
717
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
718
        qemu_wait_io_event_common(ENV_GET_CPU(env));
719
    }
720
}
721

    
722
static void qemu_kvm_wait_io_event(CPUArchState *env)
723
{
724
    CPUState *cpu = ENV_GET_CPU(env);
725

    
726
    while (cpu_thread_is_idle(env)) {
727
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
728
    }
729

    
730
    qemu_kvm_eat_signals(cpu);
731
    qemu_wait_io_event_common(cpu);
732
}
733

    
734
static void *qemu_kvm_cpu_thread_fn(void *arg)
735
{
736
    CPUArchState *env = arg;
737
    CPUState *cpu = ENV_GET_CPU(env);
738
    int r;
739

    
740
    qemu_mutex_lock(&qemu_global_mutex);
741
    qemu_thread_get_self(cpu->thread);
742
    cpu->thread_id = qemu_get_thread_id();
743
    cpu_single_env = env;
744

    
745
    r = kvm_init_vcpu(cpu);
746
    if (r < 0) {
747
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
748
        exit(1);
749
    }
750

    
751
    qemu_kvm_init_cpu_signals(env);
752

    
753
    /* signal CPU creation */
754
    cpu->created = true;
755
    qemu_cond_signal(&qemu_cpu_cond);
756

    
757
    while (1) {
758
        if (cpu_can_run(cpu)) {
759
            r = kvm_cpu_exec(env);
760
            if (r == EXCP_DEBUG) {
761
                cpu_handle_guest_debug(env);
762
            }
763
        }
764
        qemu_kvm_wait_io_event(env);
765
    }
766

    
767
    return NULL;
768
}
769

    
770
static void *qemu_dummy_cpu_thread_fn(void *arg)
771
{
772
#ifdef _WIN32
773
    fprintf(stderr, "qtest is not supported under Windows\n");
774
    exit(1);
775
#else
776
    CPUArchState *env = arg;
777
    CPUState *cpu = ENV_GET_CPU(env);
778
    sigset_t waitset;
779
    int r;
780

    
781
    qemu_mutex_lock_iothread();
782
    qemu_thread_get_self(cpu->thread);
783
    cpu->thread_id = qemu_get_thread_id();
784

    
785
    sigemptyset(&waitset);
786
    sigaddset(&waitset, SIG_IPI);
787

    
788
    /* signal CPU creation */
789
    cpu->created = true;
790
    qemu_cond_signal(&qemu_cpu_cond);
791

    
792
    cpu_single_env = env;
793
    while (1) {
794
        cpu_single_env = NULL;
795
        qemu_mutex_unlock_iothread();
796
        do {
797
            int sig;
798
            r = sigwait(&waitset, &sig);
799
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
800
        if (r == -1) {
801
            perror("sigwait");
802
            exit(1);
803
        }
804
        qemu_mutex_lock_iothread();
805
        cpu_single_env = env;
806
        qemu_wait_io_event_common(cpu);
807
    }
808

    
809
    return NULL;
810
#endif
811
}
812

    
813
static void tcg_exec_all(void);
814

    
815
static void *qemu_tcg_cpu_thread_fn(void *arg)
816
{
817
    CPUState *cpu = arg;
818
    CPUArchState *env;
819

    
820
    qemu_tcg_init_cpu_signals();
821
    qemu_thread_get_self(cpu->thread);
822

    
823
    /* signal CPU creation */
824
    qemu_mutex_lock(&qemu_global_mutex);
825
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
826
        cpu = ENV_GET_CPU(env);
827
        cpu->thread_id = qemu_get_thread_id();
828
        cpu->created = true;
829
    }
830
    qemu_cond_signal(&qemu_cpu_cond);
831

    
832
    /* wait for initial kick-off after machine start */
833
    while (ENV_GET_CPU(first_cpu)->stopped) {
834
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
835

    
836
        /* process any pending work */
837
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
838
            qemu_wait_io_event_common(ENV_GET_CPU(env));
839
        }
840
    }
841

    
842
    while (1) {
843
        tcg_exec_all();
844
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
845
            qemu_notify_event();
846
        }
847
        qemu_tcg_wait_io_event();
848
    }
849

    
850
    return NULL;
851
}
852

    
853
static void qemu_cpu_kick_thread(CPUState *cpu)
854
{
855
#ifndef _WIN32
856
    int err;
857

    
858
    err = pthread_kill(cpu->thread->thread, SIG_IPI);
859
    if (err) {
860
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
861
        exit(1);
862
    }
863
#else /* _WIN32 */
864
    if (!qemu_cpu_is_self(cpu)) {
865
        CONTEXT tcgContext;
866

    
867
        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
868
            fprintf(stderr, "qemu:%s: GetLastError:%d\n", __func__,
869
                    GetLastError());
870
            exit(1);
871
        }
872

    
873
        /* On multi-core systems, we are not sure that the thread is actually
874
         * suspended until we can get the context.
875
         */
876
        tcgContext.ContextFlags = CONTEXT_CONTROL;
877
        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
878
            continue;
879
        }
880

    
881
        cpu_signal(0);
882

    
883
        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
884
            fprintf(stderr, "qemu:%s: GetLastError:%d\n", __func__,
885
                    GetLastError());
886
            exit(1);
887
        }
888
    }
889
#endif
890
}
891

    
892
void qemu_cpu_kick(CPUState *cpu)
893
{
894
    qemu_cond_broadcast(cpu->halt_cond);
895
    if (!tcg_enabled() && !cpu->thread_kicked) {
896
        qemu_cpu_kick_thread(cpu);
897
        cpu->thread_kicked = true;
898
    }
899
}
900

    
901
void qemu_cpu_kick_self(void)
902
{
903
#ifndef _WIN32
904
    assert(cpu_single_env);
905
    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
906

    
907
    if (!cpu_single_cpu->thread_kicked) {
908
        qemu_cpu_kick_thread(cpu_single_cpu);
909
        cpu_single_cpu->thread_kicked = true;
910
    }
911
#else
912
    abort();
913
#endif
914
}
915

    
916
bool qemu_cpu_is_self(CPUState *cpu)
917
{
918
    return qemu_thread_is_self(cpu->thread);
919
}
920

    
921
static bool qemu_in_vcpu_thread(void)
922
{
923
    return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
924
}
925

    
926
void qemu_mutex_lock_iothread(void)
927
{
928
    if (!tcg_enabled()) {
929
        qemu_mutex_lock(&qemu_global_mutex);
930
    } else {
931
        iothread_requesting_mutex = true;
932
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
933
            qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
934
            qemu_mutex_lock(&qemu_global_mutex);
935
        }
936
        iothread_requesting_mutex = false;
937
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
938
    }
939
}
940

    
941
void qemu_mutex_unlock_iothread(void)
942
{
943
    qemu_mutex_unlock(&qemu_global_mutex);
944
}
945

    
946
static int all_vcpus_paused(void)
947
{
948
    CPUArchState *penv = first_cpu;
949

    
950
    while (penv) {
951
        CPUState *pcpu = ENV_GET_CPU(penv);
952
        if (!pcpu->stopped) {
953
            return 0;
954
        }
955
        penv = penv->next_cpu;
956
    }
957

    
958
    return 1;
959
}
960

    
961
void pause_all_vcpus(void)
962
{
963
    CPUArchState *penv = first_cpu;
964

    
965
    qemu_clock_enable(vm_clock, false);
966
    while (penv) {
967
        CPUState *pcpu = ENV_GET_CPU(penv);
968
        pcpu->stop = true;
969
        qemu_cpu_kick(pcpu);
970
        penv = penv->next_cpu;
971
    }
972

    
973
    if (qemu_in_vcpu_thread()) {
974
        cpu_stop_current();
975
        if (!kvm_enabled()) {
976
            while (penv) {
977
                CPUState *pcpu = ENV_GET_CPU(penv);
978
                pcpu->stop = 0;
979
                pcpu->stopped = true;
980
                penv = penv->next_cpu;
981
            }
982
            return;
983
        }
984
    }
985

    
986
    while (!all_vcpus_paused()) {
987
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
988
        penv = first_cpu;
989
        while (penv) {
990
            qemu_cpu_kick(ENV_GET_CPU(penv));
991
            penv = penv->next_cpu;
992
        }
993
    }
994
}
995

    
996
void resume_all_vcpus(void)
997
{
998
    CPUArchState *penv = first_cpu;
999

    
1000
    qemu_clock_enable(vm_clock, true);
1001
    while (penv) {
1002
        CPUState *pcpu = ENV_GET_CPU(penv);
1003
        pcpu->stop = false;
1004
        pcpu->stopped = false;
1005
        qemu_cpu_kick(pcpu);
1006
        penv = penv->next_cpu;
1007
    }
1008
}
1009

    
1010
static void qemu_tcg_init_vcpu(CPUState *cpu)
1011
{
1012
    /* share a single thread for all cpus with TCG */
1013
    if (!tcg_cpu_thread) {
1014
        cpu->thread = g_malloc0(sizeof(QemuThread));
1015
        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1016
        qemu_cond_init(cpu->halt_cond);
1017
        tcg_halt_cond = cpu->halt_cond;
1018
        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1019
                           QEMU_THREAD_JOINABLE);
1020
#ifdef _WIN32
1021
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1022
#endif
1023
        while (!cpu->created) {
1024
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1025
        }
1026
        tcg_cpu_thread = cpu->thread;
1027
    } else {
1028
        cpu->thread = tcg_cpu_thread;
1029
        cpu->halt_cond = tcg_halt_cond;
1030
    }
1031
}
1032

    
1033
static void qemu_kvm_start_vcpu(CPUArchState *env)
1034
{
1035
    CPUState *cpu = ENV_GET_CPU(env);
1036

    
1037
    cpu->thread = g_malloc0(sizeof(QemuThread));
1038
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1039
    qemu_cond_init(cpu->halt_cond);
1040
    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1041
                       QEMU_THREAD_JOINABLE);
1042
    while (!cpu->created) {
1043
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1044
    }
1045
}
1046

    
1047
static void qemu_dummy_start_vcpu(CPUArchState *env)
1048
{
1049
    CPUState *cpu = ENV_GET_CPU(env);
1050

    
1051
    cpu->thread = g_malloc0(sizeof(QemuThread));
1052
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1053
    qemu_cond_init(cpu->halt_cond);
1054
    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1055
                       QEMU_THREAD_JOINABLE);
1056
    while (!cpu->created) {
1057
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1058
    }
1059
}
1060

    
1061
void qemu_init_vcpu(void *_env)
1062
{
1063
    CPUArchState *env = _env;
1064
    CPUState *cpu = ENV_GET_CPU(env);
1065

    
1066
    cpu->nr_cores = smp_cores;
1067
    cpu->nr_threads = smp_threads;
1068
    cpu->stopped = true;
1069
    if (kvm_enabled()) {
1070
        qemu_kvm_start_vcpu(env);
1071
    } else if (tcg_enabled()) {
1072
        qemu_tcg_init_vcpu(cpu);
1073
    } else {
1074
        qemu_dummy_start_vcpu(env);
1075
    }
1076
}
1077

    
1078
void cpu_stop_current(void)
1079
{
1080
    if (cpu_single_env) {
1081
        CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
1082
        cpu_single_cpu->stop = false;
1083
        cpu_single_cpu->stopped = true;
1084
        cpu_exit(cpu_single_env);
1085
        qemu_cond_signal(&qemu_pause_cond);
1086
    }
1087
}
1088

    
1089
void vm_stop(RunState state)
1090
{
1091
    if (qemu_in_vcpu_thread()) {
1092
        qemu_system_vmstop_request(state);
1093
        /*
1094
         * FIXME: should not return to device code in case
1095
         * vm_stop() has been requested.
1096
         */
1097
        cpu_stop_current();
1098
        return;
1099
    }
1100
    do_vm_stop(state);
1101
}
1102

    
1103
/* does a state transition even if the VM is already stopped,
1104
   current state is forgotten forever */
1105
void vm_stop_force_state(RunState state)
1106
{
1107
    if (runstate_is_running()) {
1108
        vm_stop(state);
1109
    } else {
1110
        runstate_set(state);
1111
    }
1112
}
1113

    
1114
static int tcg_cpu_exec(CPUArchState *env)
1115
{
1116
    int ret;
1117
#ifdef CONFIG_PROFILER
1118
    int64_t ti;
1119
#endif
1120

    
1121
#ifdef CONFIG_PROFILER
1122
    ti = profile_getclock();
1123
#endif
1124
    if (use_icount) {
1125
        int64_t count;
1126
        int decr;
1127
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1128
        env->icount_decr.u16.low = 0;
1129
        env->icount_extra = 0;
1130
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1131
        qemu_icount += count;
1132
        decr = (count > 0xffff) ? 0xffff : count;
1133
        count -= decr;
1134
        env->icount_decr.u16.low = decr;
1135
        env->icount_extra = count;
1136
    }
1137
    ret = cpu_exec(env);
1138
#ifdef CONFIG_PROFILER
1139
    qemu_time += profile_getclock() - ti;
1140
#endif
1141
    if (use_icount) {
1142
        /* Fold pending instructions back into the
1143
           instruction counter, and clear the interrupt flag.  */
1144
        qemu_icount -= (env->icount_decr.u16.low
1145
                        + env->icount_extra);
1146
        env->icount_decr.u32 = 0;
1147
        env->icount_extra = 0;
1148
    }
1149
    return ret;
1150
}
1151

    
1152
static void tcg_exec_all(void)
1153
{
1154
    int r;
1155

    
1156
    /* Account partial waits to the vm_clock.  */
1157
    qemu_clock_warp(vm_clock);
1158

    
1159
    if (next_cpu == NULL) {
1160
        next_cpu = first_cpu;
1161
    }
1162
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1163
        CPUArchState *env = next_cpu;
1164
        CPUState *cpu = ENV_GET_CPU(env);
1165

    
1166
        qemu_clock_enable(vm_clock,
1167
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1168

    
1169
        if (cpu_can_run(cpu)) {
1170
            r = tcg_cpu_exec(env);
1171
            if (r == EXCP_DEBUG) {
1172
                cpu_handle_guest_debug(env);
1173
                break;
1174
            }
1175
        } else if (cpu->stop || cpu->stopped) {
1176
            break;
1177
        }
1178
    }
1179
    exit_request = 0;
1180
}
1181

    
1182
void set_numa_modes(void)
1183
{
1184
    CPUArchState *env;
1185
    CPUState *cpu;
1186
    int i;
1187

    
1188
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1189
        cpu = ENV_GET_CPU(env);
1190
        for (i = 0; i < nb_numa_nodes; i++) {
1191
            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1192
                cpu->numa_node = i;
1193
            }
1194
        }
1195
    }
1196
}
1197

    
1198
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1199
{
1200
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1201
#if defined(cpu_list)
1202
    cpu_list(f, cpu_fprintf);
1203
#endif
1204
}
1205

    
1206
CpuInfoList *qmp_query_cpus(Error **errp)
1207
{
1208
    CpuInfoList *head = NULL, *cur_item = NULL;
1209
    CPUArchState *env;
1210

    
1211
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1212
        CPUState *cpu = ENV_GET_CPU(env);
1213
        CpuInfoList *info;
1214

    
1215
        cpu_synchronize_state(env);
1216

    
1217
        info = g_malloc0(sizeof(*info));
1218
        info->value = g_malloc0(sizeof(*info->value));
1219
        info->value->CPU = cpu->cpu_index;
1220
        info->value->current = (env == first_cpu);
1221
        info->value->halted = cpu->halted;
1222
        info->value->thread_id = cpu->thread_id;
1223
#if defined(TARGET_I386)
1224
        info->value->has_pc = true;
1225
        info->value->pc = env->eip + env->segs[R_CS].base;
1226
#elif defined(TARGET_PPC)
1227
        info->value->has_nip = true;
1228
        info->value->nip = env->nip;
1229
#elif defined(TARGET_SPARC)
1230
        info->value->has_pc = true;
1231
        info->value->pc = env->pc;
1232
        info->value->has_npc = true;
1233
        info->value->npc = env->npc;
1234
#elif defined(TARGET_MIPS)
1235
        info->value->has_PC = true;
1236
        info->value->PC = env->active_tc.PC;
1237
#endif
1238

    
1239
        /* XXX: waiting for the qapi to support GSList */
1240
        if (!cur_item) {
1241
            head = cur_item = info;
1242
        } else {
1243
            cur_item->next = info;
1244
            cur_item = info;
1245
        }
1246
    }
1247

    
1248
    return head;
1249
}
1250

    
1251
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1252
                 bool has_cpu, int64_t cpu_index, Error **errp)
1253
{
1254
    FILE *f;
1255
    uint32_t l;
1256
    CPUArchState *env;
1257
    CPUState *cpu;
1258
    uint8_t buf[1024];
1259

    
1260
    if (!has_cpu) {
1261
        cpu_index = 0;
1262
    }
1263

    
1264
    cpu = qemu_get_cpu(cpu_index);
1265
    if (cpu == NULL) {
1266
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1267
                  "a CPU number");
1268
        return;
1269
    }
1270
    env = cpu->env_ptr;
1271

    
1272
    f = fopen(filename, "wb");
1273
    if (!f) {
1274
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1275
        return;
1276
    }
1277

    
1278
    while (size != 0) {
1279
        l = sizeof(buf);
1280
        if (l > size)
1281
            l = size;
1282
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1283
        if (fwrite(buf, 1, l, f) != l) {
1284
            error_set(errp, QERR_IO_ERROR);
1285
            goto exit;
1286
        }
1287
        addr += l;
1288
        size -= l;
1289
    }
1290

    
1291
exit:
1292
    fclose(f);
1293
}
1294

    
1295
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1296
                  Error **errp)
1297
{
1298
    FILE *f;
1299
    uint32_t l;
1300
    uint8_t buf[1024];
1301

    
1302
    f = fopen(filename, "wb");
1303
    if (!f) {
1304
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1305
        return;
1306
    }
1307

    
1308
    while (size != 0) {
1309
        l = sizeof(buf);
1310
        if (l > size)
1311
            l = size;
1312
        cpu_physical_memory_rw(addr, buf, l, 0);
1313
        if (fwrite(buf, 1, l, f) != l) {
1314
            error_set(errp, QERR_IO_ERROR);
1315
            goto exit;
1316
        }
1317
        addr += l;
1318
        size -= l;
1319
    }
1320

    
1321
exit:
1322
    fclose(f);
1323
}
1324

    
1325
void qmp_inject_nmi(Error **errp)
1326
{
1327
#if defined(TARGET_I386)
1328
    CPUArchState *env;
1329

    
1330
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1331
        if (!env->apic_state) {
1332
            cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
1333
        } else {
1334
            apic_deliver_nmi(env->apic_state);
1335
        }
1336
    }
1337
#else
1338
    error_set(errp, QERR_UNSUPPORTED);
1339
#endif
1340
}