Statistics
| Branch: | Revision:

root / cpus.c @ c3affe56

History | View | Annotate | Download (33.1 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor/monitor.h"
29
#include "sysemu/sysemu.h"
30
#include "exec/gdbstub.h"
31
#include "sysemu/dma.h"
32
#include "sysemu/kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu/thread.h"
36
#include "sysemu/cpus.h"
37
#include "sysemu/qtest.h"
38
#include "qemu/main-loop.h"
39
#include "qemu/bitmap.h"
40

    
41
#ifndef _WIN32
42
#include "qemu/compatfd.h"
43
#endif
44

    
45
#ifdef CONFIG_LINUX
46

    
47
#include <sys/prctl.h>
48

    
49
#ifndef PR_MCE_KILL
50
#define PR_MCE_KILL 33
51
#endif
52

    
53
#ifndef PR_MCE_KILL_SET
54
#define PR_MCE_KILL_SET 1
55
#endif
56

    
57
#ifndef PR_MCE_KILL_EARLY
58
#define PR_MCE_KILL_EARLY 1
59
#endif
60

    
61
#endif /* CONFIG_LINUX */
62

    
63
static CPUArchState *next_cpu;
64

    
65
static bool cpu_thread_is_idle(CPUArchState *env)
66
{
67
    CPUState *cpu = ENV_GET_CPU(env);
68

    
69
    if (cpu->stop || cpu->queued_work_first) {
70
        return false;
71
    }
72
    if (cpu->stopped || !runstate_is_running()) {
73
        return true;
74
    }
75
    if (!cpu->halted || qemu_cpu_has_work(cpu) ||
76
        kvm_async_interrupts_enabled()) {
77
        return false;
78
    }
79
    return true;
80
}
81

    
82
static bool all_cpu_threads_idle(void)
83
{
84
    CPUArchState *env;
85

    
86
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
87
        if (!cpu_thread_is_idle(env)) {
88
            return false;
89
        }
90
    }
91
    return true;
92
}
93

    
94
/***********************************************************/
95
/* guest cycle counter */
96

    
97
/* Conversion factor from emulated instructions to virtual clock ticks.  */
98
static int icount_time_shift;
99
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
100
#define MAX_ICOUNT_SHIFT 10
101
/* Compensate for varying guest execution speed.  */
102
static int64_t qemu_icount_bias;
103
static QEMUTimer *icount_rt_timer;
104
static QEMUTimer *icount_vm_timer;
105
static QEMUTimer *icount_warp_timer;
106
static int64_t vm_clock_warp_start;
107
static int64_t qemu_icount;
108

    
109
typedef struct TimersState {
110
    int64_t cpu_ticks_prev;
111
    int64_t cpu_ticks_offset;
112
    int64_t cpu_clock_offset;
113
    int32_t cpu_ticks_enabled;
114
    int64_t dummy;
115
} TimersState;
116

    
117
TimersState timers_state;
118

    
119
/* Return the virtual CPU time, based on the instruction counter.  */
120
int64_t cpu_get_icount(void)
121
{
122
    int64_t icount;
123
    CPUArchState *env = cpu_single_env;
124

    
125
    icount = qemu_icount;
126
    if (env) {
127
        if (!can_do_io(env)) {
128
            fprintf(stderr, "Bad clock read\n");
129
        }
130
        icount -= (env->icount_decr.u16.low + env->icount_extra);
131
    }
132
    return qemu_icount_bias + (icount << icount_time_shift);
133
}
134

    
135
/* return the host CPU cycle counter and handle stop/restart */
136
int64_t cpu_get_ticks(void)
137
{
138
    if (use_icount) {
139
        return cpu_get_icount();
140
    }
141
    if (!timers_state.cpu_ticks_enabled) {
142
        return timers_state.cpu_ticks_offset;
143
    } else {
144
        int64_t ticks;
145
        ticks = cpu_get_real_ticks();
146
        if (timers_state.cpu_ticks_prev > ticks) {
147
            /* Note: non increasing ticks may happen if the host uses
148
               software suspend */
149
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
150
        }
151
        timers_state.cpu_ticks_prev = ticks;
152
        return ticks + timers_state.cpu_ticks_offset;
153
    }
154
}
155

    
156
/* return the host CPU monotonic timer and handle stop/restart */
157
int64_t cpu_get_clock(void)
158
{
159
    int64_t ti;
160
    if (!timers_state.cpu_ticks_enabled) {
161
        return timers_state.cpu_clock_offset;
162
    } else {
163
        ti = get_clock();
164
        return ti + timers_state.cpu_clock_offset;
165
    }
166
}
167

    
168
/* enable cpu_get_ticks() */
169
void cpu_enable_ticks(void)
170
{
171
    if (!timers_state.cpu_ticks_enabled) {
172
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
173
        timers_state.cpu_clock_offset -= get_clock();
174
        timers_state.cpu_ticks_enabled = 1;
175
    }
176
}
177

    
178
/* disable cpu_get_ticks() : the clock is stopped. You must not call
179
   cpu_get_ticks() after that.  */
180
void cpu_disable_ticks(void)
181
{
182
    if (timers_state.cpu_ticks_enabled) {
183
        timers_state.cpu_ticks_offset = cpu_get_ticks();
184
        timers_state.cpu_clock_offset = cpu_get_clock();
185
        timers_state.cpu_ticks_enabled = 0;
186
    }
187
}
188

    
189
/* Correlation between real and virtual time is always going to be
190
   fairly approximate, so ignore small variation.
191
   When the guest is idle real and virtual time will be aligned in
192
   the IO wait loop.  */
193
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
194

    
195
static void icount_adjust(void)
196
{
197
    int64_t cur_time;
198
    int64_t cur_icount;
199
    int64_t delta;
200
    static int64_t last_delta;
201
    /* If the VM is not running, then do nothing.  */
202
    if (!runstate_is_running()) {
203
        return;
204
    }
205
    cur_time = cpu_get_clock();
206
    cur_icount = qemu_get_clock_ns(vm_clock);
207
    delta = cur_icount - cur_time;
208
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
209
    if (delta > 0
210
        && last_delta + ICOUNT_WOBBLE < delta * 2
211
        && icount_time_shift > 0) {
212
        /* The guest is getting too far ahead.  Slow time down.  */
213
        icount_time_shift--;
214
    }
215
    if (delta < 0
216
        && last_delta - ICOUNT_WOBBLE > delta * 2
217
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
218
        /* The guest is getting too far behind.  Speed time up.  */
219
        icount_time_shift++;
220
    }
221
    last_delta = delta;
222
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
223
}
224

    
225
static void icount_adjust_rt(void *opaque)
226
{
227
    qemu_mod_timer(icount_rt_timer,
228
                   qemu_get_clock_ms(rt_clock) + 1000);
229
    icount_adjust();
230
}
231

    
232
static void icount_adjust_vm(void *opaque)
233
{
234
    qemu_mod_timer(icount_vm_timer,
235
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
236
    icount_adjust();
237
}
238

    
239
static int64_t qemu_icount_round(int64_t count)
240
{
241
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
242
}
243

    
244
static void icount_warp_rt(void *opaque)
245
{
246
    if (vm_clock_warp_start == -1) {
247
        return;
248
    }
249

    
250
    if (runstate_is_running()) {
251
        int64_t clock = qemu_get_clock_ns(rt_clock);
252
        int64_t warp_delta = clock - vm_clock_warp_start;
253
        if (use_icount == 1) {
254
            qemu_icount_bias += warp_delta;
255
        } else {
256
            /*
257
             * In adaptive mode, do not let the vm_clock run too
258
             * far ahead of real time.
259
             */
260
            int64_t cur_time = cpu_get_clock();
261
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
262
            int64_t delta = cur_time - cur_icount;
263
            qemu_icount_bias += MIN(warp_delta, delta);
264
        }
265
        if (qemu_clock_expired(vm_clock)) {
266
            qemu_notify_event();
267
        }
268
    }
269
    vm_clock_warp_start = -1;
270
}
271

    
272
void qtest_clock_warp(int64_t dest)
273
{
274
    int64_t clock = qemu_get_clock_ns(vm_clock);
275
    assert(qtest_enabled());
276
    while (clock < dest) {
277
        int64_t deadline = qemu_clock_deadline(vm_clock);
278
        int64_t warp = MIN(dest - clock, deadline);
279
        qemu_icount_bias += warp;
280
        qemu_run_timers(vm_clock);
281
        clock = qemu_get_clock_ns(vm_clock);
282
    }
283
    qemu_notify_event();
284
}
285

    
286
void qemu_clock_warp(QEMUClock *clock)
287
{
288
    int64_t deadline;
289

    
290
    /*
291
     * There are too many global variables to make the "warp" behavior
292
     * applicable to other clocks.  But a clock argument removes the
293
     * need for if statements all over the place.
294
     */
295
    if (clock != vm_clock || !use_icount) {
296
        return;
297
    }
298

    
299
    /*
300
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
301
     * ensures that the deadline for the timer is computed correctly below.
302
     * This also makes sure that the insn counter is synchronized before the
303
     * CPU starts running, in case the CPU is woken by an event other than
304
     * the earliest vm_clock timer.
305
     */
306
    icount_warp_rt(NULL);
307
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
308
        qemu_del_timer(icount_warp_timer);
309
        return;
310
    }
311

    
312
    if (qtest_enabled()) {
313
        /* When testing, qtest commands advance icount.  */
314
        return;
315
    }
316

    
317
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
318
    deadline = qemu_clock_deadline(vm_clock);
319
    if (deadline > 0) {
320
        /*
321
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
322
         * sleep.  Otherwise, the CPU might be waiting for a future timer
323
         * interrupt to wake it up, but the interrupt never comes because
324
         * the vCPU isn't running any insns and thus doesn't advance the
325
         * vm_clock.
326
         *
327
         * An extreme solution for this problem would be to never let VCPUs
328
         * sleep in icount mode if there is a pending vm_clock timer; rather
329
         * time could just advance to the next vm_clock event.  Instead, we
330
         * do stop VCPUs and only advance vm_clock after some "real" time,
331
         * (related to the time left until the next event) has passed.  This
332
         * rt_clock timer will do this.  This avoids that the warps are too
333
         * visible externally---for example, you will not be sending network
334
         * packets continuously instead of every 100ms.
335
         */
336
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
337
    } else {
338
        qemu_notify_event();
339
    }
340
}
341

    
342
static const VMStateDescription vmstate_timers = {
343
    .name = "timer",
344
    .version_id = 2,
345
    .minimum_version_id = 1,
346
    .minimum_version_id_old = 1,
347
    .fields      = (VMStateField[]) {
348
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
349
        VMSTATE_INT64(dummy, TimersState),
350
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
351
        VMSTATE_END_OF_LIST()
352
    }
353
};
354

    
355
void configure_icount(const char *option)
356
{
357
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
358
    if (!option) {
359
        return;
360
    }
361

    
362
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
363
    if (strcmp(option, "auto") != 0) {
364
        icount_time_shift = strtol(option, NULL, 0);
365
        use_icount = 1;
366
        return;
367
    }
368

    
369
    use_icount = 2;
370

    
371
    /* 125MIPS seems a reasonable initial guess at the guest speed.
372
       It will be corrected fairly quickly anyway.  */
373
    icount_time_shift = 3;
374

    
375
    /* Have both realtime and virtual time triggers for speed adjustment.
376
       The realtime trigger catches emulated time passing too slowly,
377
       the virtual time trigger catches emulated time passing too fast.
378
       Realtime triggers occur even when idle, so use them less frequently
379
       than VM triggers.  */
380
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
381
    qemu_mod_timer(icount_rt_timer,
382
                   qemu_get_clock_ms(rt_clock) + 1000);
383
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
384
    qemu_mod_timer(icount_vm_timer,
385
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
386
}
387

    
388
/***********************************************************/
389
void hw_error(const char *fmt, ...)
390
{
391
    va_list ap;
392
    CPUArchState *env;
393
    CPUState *cpu;
394

    
395
    va_start(ap, fmt);
396
    fprintf(stderr, "qemu: hardware error: ");
397
    vfprintf(stderr, fmt, ap);
398
    fprintf(stderr, "\n");
399
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
400
        cpu = ENV_GET_CPU(env);
401
        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
402
        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
403
    }
404
    va_end(ap);
405
    abort();
406
}
407

    
408
void cpu_synchronize_all_states(void)
409
{
410
    CPUArchState *cpu;
411

    
412
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
413
        cpu_synchronize_state(cpu);
414
    }
415
}
416

    
417
void cpu_synchronize_all_post_reset(void)
418
{
419
    CPUArchState *cpu;
420

    
421
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
422
        cpu_synchronize_post_reset(cpu);
423
    }
424
}
425

    
426
void cpu_synchronize_all_post_init(void)
427
{
428
    CPUArchState *cpu;
429

    
430
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
431
        cpu_synchronize_post_init(cpu);
432
    }
433
}
434

    
435
bool cpu_is_stopped(CPUState *cpu)
436
{
437
    return !runstate_is_running() || cpu->stopped;
438
}
439

    
440
static void do_vm_stop(RunState state)
441
{
442
    if (runstate_is_running()) {
443
        cpu_disable_ticks();
444
        pause_all_vcpus();
445
        runstate_set(state);
446
        vm_state_notify(0, state);
447
        bdrv_drain_all();
448
        bdrv_flush_all();
449
        monitor_protocol_event(QEVENT_STOP, NULL);
450
    }
451
}
452

    
453
static bool cpu_can_run(CPUState *cpu)
454
{
455
    if (cpu->stop) {
456
        return false;
457
    }
458
    if (cpu->stopped || !runstate_is_running()) {
459
        return false;
460
    }
461
    return true;
462
}
463

    
464
static void cpu_handle_guest_debug(CPUArchState *env)
465
{
466
    CPUState *cpu = ENV_GET_CPU(env);
467

    
468
    gdb_set_stop_cpu(env);
469
    qemu_system_debug_request();
470
    cpu->stopped = true;
471
}
472

    
473
static void cpu_signal(int sig)
474
{
475
    if (cpu_single_env) {
476
        cpu_exit(cpu_single_env);
477
    }
478
    exit_request = 1;
479
}
480

    
481
#ifdef CONFIG_LINUX
482
static void sigbus_reraise(void)
483
{
484
    sigset_t set;
485
    struct sigaction action;
486

    
487
    memset(&action, 0, sizeof(action));
488
    action.sa_handler = SIG_DFL;
489
    if (!sigaction(SIGBUS, &action, NULL)) {
490
        raise(SIGBUS);
491
        sigemptyset(&set);
492
        sigaddset(&set, SIGBUS);
493
        sigprocmask(SIG_UNBLOCK, &set, NULL);
494
    }
495
    perror("Failed to re-raise SIGBUS!\n");
496
    abort();
497
}
498

    
499
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
500
                           void *ctx)
501
{
502
    if (kvm_on_sigbus(siginfo->ssi_code,
503
                      (void *)(intptr_t)siginfo->ssi_addr)) {
504
        sigbus_reraise();
505
    }
506
}
507

    
508
static void qemu_init_sigbus(void)
509
{
510
    struct sigaction action;
511

    
512
    memset(&action, 0, sizeof(action));
513
    action.sa_flags = SA_SIGINFO;
514
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
515
    sigaction(SIGBUS, &action, NULL);
516

    
517
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
518
}
519

    
520
static void qemu_kvm_eat_signals(CPUState *cpu)
521
{
522
    struct timespec ts = { 0, 0 };
523
    siginfo_t siginfo;
524
    sigset_t waitset;
525
    sigset_t chkset;
526
    int r;
527

    
528
    sigemptyset(&waitset);
529
    sigaddset(&waitset, SIG_IPI);
530
    sigaddset(&waitset, SIGBUS);
531

    
532
    do {
533
        r = sigtimedwait(&waitset, &siginfo, &ts);
534
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
535
            perror("sigtimedwait");
536
            exit(1);
537
        }
538

    
539
        switch (r) {
540
        case SIGBUS:
541
            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
542
                sigbus_reraise();
543
            }
544
            break;
545
        default:
546
            break;
547
        }
548

    
549
        r = sigpending(&chkset);
550
        if (r == -1) {
551
            perror("sigpending");
552
            exit(1);
553
        }
554
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
555
}
556

    
557
#else /* !CONFIG_LINUX */
558

    
559
static void qemu_init_sigbus(void)
560
{
561
}
562

    
563
static void qemu_kvm_eat_signals(CPUState *cpu)
564
{
565
}
566
#endif /* !CONFIG_LINUX */
567

    
568
#ifndef _WIN32
569
static void dummy_signal(int sig)
570
{
571
}
572

    
573
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
574
{
575
    int r;
576
    sigset_t set;
577
    struct sigaction sigact;
578

    
579
    memset(&sigact, 0, sizeof(sigact));
580
    sigact.sa_handler = dummy_signal;
581
    sigaction(SIG_IPI, &sigact, NULL);
582

    
583
    pthread_sigmask(SIG_BLOCK, NULL, &set);
584
    sigdelset(&set, SIG_IPI);
585
    sigdelset(&set, SIGBUS);
586
    r = kvm_set_signal_mask(env, &set);
587
    if (r) {
588
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
589
        exit(1);
590
    }
591
}
592

    
593
static void qemu_tcg_init_cpu_signals(void)
594
{
595
    sigset_t set;
596
    struct sigaction sigact;
597

    
598
    memset(&sigact, 0, sizeof(sigact));
599
    sigact.sa_handler = cpu_signal;
600
    sigaction(SIG_IPI, &sigact, NULL);
601

    
602
    sigemptyset(&set);
603
    sigaddset(&set, SIG_IPI);
604
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
605
}
606

    
607
#else /* _WIN32 */
608
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
609
{
610
    abort();
611
}
612

    
613
static void qemu_tcg_init_cpu_signals(void)
614
{
615
}
616
#endif /* _WIN32 */
617

    
618
static QemuMutex qemu_global_mutex;
619
static QemuCond qemu_io_proceeded_cond;
620
static bool iothread_requesting_mutex;
621

    
622
static QemuThread io_thread;
623

    
624
static QemuThread *tcg_cpu_thread;
625
static QemuCond *tcg_halt_cond;
626

    
627
/* cpu creation */
628
static QemuCond qemu_cpu_cond;
629
/* system init */
630
static QemuCond qemu_pause_cond;
631
static QemuCond qemu_work_cond;
632

    
633
void qemu_init_cpu_loop(void)
634
{
635
    qemu_init_sigbus();
636
    qemu_cond_init(&qemu_cpu_cond);
637
    qemu_cond_init(&qemu_pause_cond);
638
    qemu_cond_init(&qemu_work_cond);
639
    qemu_cond_init(&qemu_io_proceeded_cond);
640
    qemu_mutex_init(&qemu_global_mutex);
641

    
642
    qemu_thread_get_self(&io_thread);
643
}
644

    
645
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
646
{
647
    struct qemu_work_item wi;
648

    
649
    if (qemu_cpu_is_self(cpu)) {
650
        func(data);
651
        return;
652
    }
653

    
654
    wi.func = func;
655
    wi.data = data;
656
    if (cpu->queued_work_first == NULL) {
657
        cpu->queued_work_first = &wi;
658
    } else {
659
        cpu->queued_work_last->next = &wi;
660
    }
661
    cpu->queued_work_last = &wi;
662
    wi.next = NULL;
663
    wi.done = false;
664

    
665
    qemu_cpu_kick(cpu);
666
    while (!wi.done) {
667
        CPUArchState *self_env = cpu_single_env;
668

    
669
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
670
        cpu_single_env = self_env;
671
    }
672
}
673

    
674
static void flush_queued_work(CPUState *cpu)
675
{
676
    struct qemu_work_item *wi;
677

    
678
    if (cpu->queued_work_first == NULL) {
679
        return;
680
    }
681

    
682
    while ((wi = cpu->queued_work_first)) {
683
        cpu->queued_work_first = wi->next;
684
        wi->func(wi->data);
685
        wi->done = true;
686
    }
687
    cpu->queued_work_last = NULL;
688
    qemu_cond_broadcast(&qemu_work_cond);
689
}
690

    
691
static void qemu_wait_io_event_common(CPUState *cpu)
692
{
693
    if (cpu->stop) {
694
        cpu->stop = false;
695
        cpu->stopped = true;
696
        qemu_cond_signal(&qemu_pause_cond);
697
    }
698
    flush_queued_work(cpu);
699
    cpu->thread_kicked = false;
700
}
701

    
702
static void qemu_tcg_wait_io_event(void)
703
{
704
    CPUArchState *env;
705

    
706
    while (all_cpu_threads_idle()) {
707
       /* Start accounting real time to the virtual clock if the CPUs
708
          are idle.  */
709
        qemu_clock_warp(vm_clock);
710
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
711
    }
712

    
713
    while (iothread_requesting_mutex) {
714
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
715
    }
716

    
717
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
718
        qemu_wait_io_event_common(ENV_GET_CPU(env));
719
    }
720
}
721

    
722
static void qemu_kvm_wait_io_event(CPUArchState *env)
723
{
724
    CPUState *cpu = ENV_GET_CPU(env);
725

    
726
    while (cpu_thread_is_idle(env)) {
727
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
728
    }
729

    
730
    qemu_kvm_eat_signals(cpu);
731
    qemu_wait_io_event_common(cpu);
732
}
733

    
734
static void *qemu_kvm_cpu_thread_fn(void *arg)
735
{
736
    CPUArchState *env = arg;
737
    CPUState *cpu = ENV_GET_CPU(env);
738
    int r;
739

    
740
    qemu_mutex_lock(&qemu_global_mutex);
741
    qemu_thread_get_self(cpu->thread);
742
    cpu->thread_id = qemu_get_thread_id();
743
    cpu_single_env = env;
744

    
745
    r = kvm_init_vcpu(cpu);
746
    if (r < 0) {
747
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
748
        exit(1);
749
    }
750

    
751
    qemu_kvm_init_cpu_signals(env);
752

    
753
    /* signal CPU creation */
754
    cpu->created = true;
755
    qemu_cond_signal(&qemu_cpu_cond);
756

    
757
    while (1) {
758
        if (cpu_can_run(cpu)) {
759
            r = kvm_cpu_exec(env);
760
            if (r == EXCP_DEBUG) {
761
                cpu_handle_guest_debug(env);
762
            }
763
        }
764
        qemu_kvm_wait_io_event(env);
765
    }
766

    
767
    return NULL;
768
}
769

    
770
static void *qemu_dummy_cpu_thread_fn(void *arg)
771
{
772
#ifdef _WIN32
773
    fprintf(stderr, "qtest is not supported under Windows\n");
774
    exit(1);
775
#else
776
    CPUArchState *env = arg;
777
    CPUState *cpu = ENV_GET_CPU(env);
778
    sigset_t waitset;
779
    int r;
780

    
781
    qemu_mutex_lock_iothread();
782
    qemu_thread_get_self(cpu->thread);
783
    cpu->thread_id = qemu_get_thread_id();
784

    
785
    sigemptyset(&waitset);
786
    sigaddset(&waitset, SIG_IPI);
787

    
788
    /* signal CPU creation */
789
    cpu->created = true;
790
    qemu_cond_signal(&qemu_cpu_cond);
791

    
792
    cpu_single_env = env;
793
    while (1) {
794
        cpu_single_env = NULL;
795
        qemu_mutex_unlock_iothread();
796
        do {
797
            int sig;
798
            r = sigwait(&waitset, &sig);
799
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
800
        if (r == -1) {
801
            perror("sigwait");
802
            exit(1);
803
        }
804
        qemu_mutex_lock_iothread();
805
        cpu_single_env = env;
806
        qemu_wait_io_event_common(cpu);
807
    }
808

    
809
    return NULL;
810
#endif
811
}
812

    
813
static void tcg_exec_all(void);
814

    
815
static void *qemu_tcg_cpu_thread_fn(void *arg)
816
{
817
    CPUState *cpu = arg;
818
    CPUArchState *env;
819

    
820
    qemu_tcg_init_cpu_signals();
821
    qemu_thread_get_self(cpu->thread);
822

    
823
    /* signal CPU creation */
824
    qemu_mutex_lock(&qemu_global_mutex);
825
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
826
        cpu = ENV_GET_CPU(env);
827
        cpu->thread_id = qemu_get_thread_id();
828
        cpu->created = true;
829
    }
830
    qemu_cond_signal(&qemu_cpu_cond);
831

    
832
    /* wait for initial kick-off after machine start */
833
    while (ENV_GET_CPU(first_cpu)->stopped) {
834
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
835

    
836
        /* process any pending work */
837
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
838
            qemu_wait_io_event_common(ENV_GET_CPU(env));
839
        }
840
    }
841

    
842
    while (1) {
843
        tcg_exec_all();
844
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
845
            qemu_notify_event();
846
        }
847
        qemu_tcg_wait_io_event();
848
    }
849

    
850
    return NULL;
851
}
852

    
853
static void qemu_cpu_kick_thread(CPUState *cpu)
854
{
855
#ifndef _WIN32
856
    int err;
857

    
858
    err = pthread_kill(cpu->thread->thread, SIG_IPI);
859
    if (err) {
860
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
861
        exit(1);
862
    }
863
#else /* _WIN32 */
864
    if (!qemu_cpu_is_self(cpu)) {
865
        SuspendThread(cpu->hThread);
866
        cpu_signal(0);
867
        ResumeThread(cpu->hThread);
868
    }
869
#endif
870
}
871

    
872
void qemu_cpu_kick(CPUState *cpu)
873
{
874
    qemu_cond_broadcast(cpu->halt_cond);
875
    if (!tcg_enabled() && !cpu->thread_kicked) {
876
        qemu_cpu_kick_thread(cpu);
877
        cpu->thread_kicked = true;
878
    }
879
}
880

    
881
void qemu_cpu_kick_self(void)
882
{
883
#ifndef _WIN32
884
    assert(cpu_single_env);
885
    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
886

    
887
    if (!cpu_single_cpu->thread_kicked) {
888
        qemu_cpu_kick_thread(cpu_single_cpu);
889
        cpu_single_cpu->thread_kicked = true;
890
    }
891
#else
892
    abort();
893
#endif
894
}
895

    
896
bool qemu_cpu_is_self(CPUState *cpu)
897
{
898
    return qemu_thread_is_self(cpu->thread);
899
}
900

    
901
static bool qemu_in_vcpu_thread(void)
902
{
903
    return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
904
}
905

    
906
void qemu_mutex_lock_iothread(void)
907
{
908
    if (!tcg_enabled()) {
909
        qemu_mutex_lock(&qemu_global_mutex);
910
    } else {
911
        iothread_requesting_mutex = true;
912
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
913
            qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
914
            qemu_mutex_lock(&qemu_global_mutex);
915
        }
916
        iothread_requesting_mutex = false;
917
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
918
    }
919
}
920

    
921
void qemu_mutex_unlock_iothread(void)
922
{
923
    qemu_mutex_unlock(&qemu_global_mutex);
924
}
925

    
926
static int all_vcpus_paused(void)
927
{
928
    CPUArchState *penv = first_cpu;
929

    
930
    while (penv) {
931
        CPUState *pcpu = ENV_GET_CPU(penv);
932
        if (!pcpu->stopped) {
933
            return 0;
934
        }
935
        penv = penv->next_cpu;
936
    }
937

    
938
    return 1;
939
}
940

    
941
void pause_all_vcpus(void)
942
{
943
    CPUArchState *penv = first_cpu;
944

    
945
    qemu_clock_enable(vm_clock, false);
946
    while (penv) {
947
        CPUState *pcpu = ENV_GET_CPU(penv);
948
        pcpu->stop = true;
949
        qemu_cpu_kick(pcpu);
950
        penv = penv->next_cpu;
951
    }
952

    
953
    if (qemu_in_vcpu_thread()) {
954
        cpu_stop_current();
955
        if (!kvm_enabled()) {
956
            while (penv) {
957
                CPUState *pcpu = ENV_GET_CPU(penv);
958
                pcpu->stop = 0;
959
                pcpu->stopped = true;
960
                penv = penv->next_cpu;
961
            }
962
            return;
963
        }
964
    }
965

    
966
    while (!all_vcpus_paused()) {
967
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
968
        penv = first_cpu;
969
        while (penv) {
970
            qemu_cpu_kick(ENV_GET_CPU(penv));
971
            penv = penv->next_cpu;
972
        }
973
    }
974
}
975

    
976
void resume_all_vcpus(void)
977
{
978
    CPUArchState *penv = first_cpu;
979

    
980
    qemu_clock_enable(vm_clock, true);
981
    while (penv) {
982
        CPUState *pcpu = ENV_GET_CPU(penv);
983
        pcpu->stop = false;
984
        pcpu->stopped = false;
985
        qemu_cpu_kick(pcpu);
986
        penv = penv->next_cpu;
987
    }
988
}
989

    
990
static void qemu_tcg_init_vcpu(CPUState *cpu)
991
{
992
    /* share a single thread for all cpus with TCG */
993
    if (!tcg_cpu_thread) {
994
        cpu->thread = g_malloc0(sizeof(QemuThread));
995
        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
996
        qemu_cond_init(cpu->halt_cond);
997
        tcg_halt_cond = cpu->halt_cond;
998
        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
999
                           QEMU_THREAD_JOINABLE);
1000
#ifdef _WIN32
1001
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1002
#endif
1003
        while (!cpu->created) {
1004
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1005
        }
1006
        tcg_cpu_thread = cpu->thread;
1007
    } else {
1008
        cpu->thread = tcg_cpu_thread;
1009
        cpu->halt_cond = tcg_halt_cond;
1010
    }
1011
}
1012

    
1013
static void qemu_kvm_start_vcpu(CPUArchState *env)
1014
{
1015
    CPUState *cpu = ENV_GET_CPU(env);
1016

    
1017
    cpu->thread = g_malloc0(sizeof(QemuThread));
1018
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1019
    qemu_cond_init(cpu->halt_cond);
1020
    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1021
                       QEMU_THREAD_JOINABLE);
1022
    while (!cpu->created) {
1023
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1024
    }
1025
}
1026

    
1027
static void qemu_dummy_start_vcpu(CPUArchState *env)
1028
{
1029
    CPUState *cpu = ENV_GET_CPU(env);
1030

    
1031
    cpu->thread = g_malloc0(sizeof(QemuThread));
1032
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1033
    qemu_cond_init(cpu->halt_cond);
1034
    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1035
                       QEMU_THREAD_JOINABLE);
1036
    while (!cpu->created) {
1037
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1038
    }
1039
}
1040

    
1041
void qemu_init_vcpu(void *_env)
1042
{
1043
    CPUArchState *env = _env;
1044
    CPUState *cpu = ENV_GET_CPU(env);
1045

    
1046
    cpu->nr_cores = smp_cores;
1047
    cpu->nr_threads = smp_threads;
1048
    cpu->stopped = true;
1049
    if (kvm_enabled()) {
1050
        qemu_kvm_start_vcpu(env);
1051
    } else if (tcg_enabled()) {
1052
        qemu_tcg_init_vcpu(cpu);
1053
    } else {
1054
        qemu_dummy_start_vcpu(env);
1055
    }
1056
}
1057

    
1058
void cpu_stop_current(void)
1059
{
1060
    if (cpu_single_env) {
1061
        CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
1062
        cpu_single_cpu->stop = false;
1063
        cpu_single_cpu->stopped = true;
1064
        cpu_exit(cpu_single_env);
1065
        qemu_cond_signal(&qemu_pause_cond);
1066
    }
1067
}
1068

    
1069
void vm_stop(RunState state)
1070
{
1071
    if (qemu_in_vcpu_thread()) {
1072
        qemu_system_vmstop_request(state);
1073
        /*
1074
         * FIXME: should not return to device code in case
1075
         * vm_stop() has been requested.
1076
         */
1077
        cpu_stop_current();
1078
        return;
1079
    }
1080
    do_vm_stop(state);
1081
}
1082

    
1083
/* does a state transition even if the VM is already stopped,
1084
   current state is forgotten forever */
1085
void vm_stop_force_state(RunState state)
1086
{
1087
    if (runstate_is_running()) {
1088
        vm_stop(state);
1089
    } else {
1090
        runstate_set(state);
1091
    }
1092
}
1093

    
1094
static int tcg_cpu_exec(CPUArchState *env)
1095
{
1096
    int ret;
1097
#ifdef CONFIG_PROFILER
1098
    int64_t ti;
1099
#endif
1100

    
1101
#ifdef CONFIG_PROFILER
1102
    ti = profile_getclock();
1103
#endif
1104
    if (use_icount) {
1105
        int64_t count;
1106
        int decr;
1107
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1108
        env->icount_decr.u16.low = 0;
1109
        env->icount_extra = 0;
1110
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1111
        qemu_icount += count;
1112
        decr = (count > 0xffff) ? 0xffff : count;
1113
        count -= decr;
1114
        env->icount_decr.u16.low = decr;
1115
        env->icount_extra = count;
1116
    }
1117
    ret = cpu_exec(env);
1118
#ifdef CONFIG_PROFILER
1119
    qemu_time += profile_getclock() - ti;
1120
#endif
1121
    if (use_icount) {
1122
        /* Fold pending instructions back into the
1123
           instruction counter, and clear the interrupt flag.  */
1124
        qemu_icount -= (env->icount_decr.u16.low
1125
                        + env->icount_extra);
1126
        env->icount_decr.u32 = 0;
1127
        env->icount_extra = 0;
1128
    }
1129
    return ret;
1130
}
1131

    
1132
static void tcg_exec_all(void)
1133
{
1134
    int r;
1135

    
1136
    /* Account partial waits to the vm_clock.  */
1137
    qemu_clock_warp(vm_clock);
1138

    
1139
    if (next_cpu == NULL) {
1140
        next_cpu = first_cpu;
1141
    }
1142
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1143
        CPUArchState *env = next_cpu;
1144
        CPUState *cpu = ENV_GET_CPU(env);
1145

    
1146
        qemu_clock_enable(vm_clock,
1147
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1148

    
1149
        if (cpu_can_run(cpu)) {
1150
            r = tcg_cpu_exec(env);
1151
            if (r == EXCP_DEBUG) {
1152
                cpu_handle_guest_debug(env);
1153
                break;
1154
            }
1155
        } else if (cpu->stop || cpu->stopped) {
1156
            break;
1157
        }
1158
    }
1159
    exit_request = 0;
1160
}
1161

    
1162
void set_numa_modes(void)
1163
{
1164
    CPUArchState *env;
1165
    CPUState *cpu;
1166
    int i;
1167

    
1168
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1169
        cpu = ENV_GET_CPU(env);
1170
        for (i = 0; i < nb_numa_nodes; i++) {
1171
            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1172
                cpu->numa_node = i;
1173
            }
1174
        }
1175
    }
1176
}
1177

    
1178
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1179
{
1180
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1181
#if defined(cpu_list)
1182
    cpu_list(f, cpu_fprintf);
1183
#endif
1184
}
1185

    
1186
CpuInfoList *qmp_query_cpus(Error **errp)
1187
{
1188
    CpuInfoList *head = NULL, *cur_item = NULL;
1189
    CPUArchState *env;
1190

    
1191
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1192
        CPUState *cpu = ENV_GET_CPU(env);
1193
        CpuInfoList *info;
1194

    
1195
        cpu_synchronize_state(env);
1196

    
1197
        info = g_malloc0(sizeof(*info));
1198
        info->value = g_malloc0(sizeof(*info->value));
1199
        info->value->CPU = cpu->cpu_index;
1200
        info->value->current = (env == first_cpu);
1201
        info->value->halted = cpu->halted;
1202
        info->value->thread_id = cpu->thread_id;
1203
#if defined(TARGET_I386)
1204
        info->value->has_pc = true;
1205
        info->value->pc = env->eip + env->segs[R_CS].base;
1206
#elif defined(TARGET_PPC)
1207
        info->value->has_nip = true;
1208
        info->value->nip = env->nip;
1209
#elif defined(TARGET_SPARC)
1210
        info->value->has_pc = true;
1211
        info->value->pc = env->pc;
1212
        info->value->has_npc = true;
1213
        info->value->npc = env->npc;
1214
#elif defined(TARGET_MIPS)
1215
        info->value->has_PC = true;
1216
        info->value->PC = env->active_tc.PC;
1217
#endif
1218

    
1219
        /* XXX: waiting for the qapi to support GSList */
1220
        if (!cur_item) {
1221
            head = cur_item = info;
1222
        } else {
1223
            cur_item->next = info;
1224
            cur_item = info;
1225
        }
1226
    }
1227

    
1228
    return head;
1229
}
1230

    
1231
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1232
                 bool has_cpu, int64_t cpu_index, Error **errp)
1233
{
1234
    FILE *f;
1235
    uint32_t l;
1236
    CPUArchState *env;
1237
    CPUState *cpu;
1238
    uint8_t buf[1024];
1239

    
1240
    if (!has_cpu) {
1241
        cpu_index = 0;
1242
    }
1243

    
1244
    cpu = qemu_get_cpu(cpu_index);
1245
    if (cpu == NULL) {
1246
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1247
                  "a CPU number");
1248
        return;
1249
    }
1250
    env = cpu->env_ptr;
1251

    
1252
    f = fopen(filename, "wb");
1253
    if (!f) {
1254
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1255
        return;
1256
    }
1257

    
1258
    while (size != 0) {
1259
        l = sizeof(buf);
1260
        if (l > size)
1261
            l = size;
1262
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1263
        if (fwrite(buf, 1, l, f) != l) {
1264
            error_set(errp, QERR_IO_ERROR);
1265
            goto exit;
1266
        }
1267
        addr += l;
1268
        size -= l;
1269
    }
1270

    
1271
exit:
1272
    fclose(f);
1273
}
1274

    
1275
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1276
                  Error **errp)
1277
{
1278
    FILE *f;
1279
    uint32_t l;
1280
    uint8_t buf[1024];
1281

    
1282
    f = fopen(filename, "wb");
1283
    if (!f) {
1284
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1285
        return;
1286
    }
1287

    
1288
    while (size != 0) {
1289
        l = sizeof(buf);
1290
        if (l > size)
1291
            l = size;
1292
        cpu_physical_memory_rw(addr, buf, l, 0);
1293
        if (fwrite(buf, 1, l, f) != l) {
1294
            error_set(errp, QERR_IO_ERROR);
1295
            goto exit;
1296
        }
1297
        addr += l;
1298
        size -= l;
1299
    }
1300

    
1301
exit:
1302
    fclose(f);
1303
}
1304

    
1305
void qmp_inject_nmi(Error **errp)
1306
{
1307
#if defined(TARGET_I386)
1308
    CPUArchState *env;
1309

    
1310
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1311
        if (!env->apic_state) {
1312
            cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
1313
        } else {
1314
            apic_deliver_nmi(env->apic_state);
1315
        }
1316
    }
1317
#else
1318
    error_set(errp, QERR_UNSUPPORTED);
1319
#endif
1320
}