Statistics
| Branch: | Revision:

root / cpus.c @ 07f35073

History | View | Annotate | Download (28.4 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor.h"
29
#include "sysemu.h"
30
#include "gdbstub.h"
31
#include "dma.h"
32
#include "kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu-thread.h"
36
#include "cpus.h"
37
#include "main-loop.h"
38

    
39
#ifndef _WIN32
40
#include "compatfd.h"
41
#endif
42

    
43
#ifdef CONFIG_LINUX
44

    
45
#include <sys/prctl.h>
46

    
47
#ifndef PR_MCE_KILL
48
#define PR_MCE_KILL 33
49
#endif
50

    
51
#ifndef PR_MCE_KILL_SET
52
#define PR_MCE_KILL_SET 1
53
#endif
54

    
55
#ifndef PR_MCE_KILL_EARLY
56
#define PR_MCE_KILL_EARLY 1
57
#endif
58

    
59
#endif /* CONFIG_LINUX */
60

    
61
static CPUState *next_cpu;
62

    
63
/***********************************************************/
64
/* guest cycle counter */
65

    
66
/* Conversion factor from emulated instructions to virtual clock ticks.  */
67
static int icount_time_shift;
68
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
69
#define MAX_ICOUNT_SHIFT 10
70
/* Compensate for varying guest execution speed.  */
71
static int64_t qemu_icount_bias;
72
static QEMUTimer *icount_rt_timer;
73
static QEMUTimer *icount_vm_timer;
74
static QEMUTimer *icount_warp_timer;
75
static int64_t vm_clock_warp_start;
76
static int64_t qemu_icount;
77

    
78
typedef struct TimersState {
79
    int64_t cpu_ticks_prev;
80
    int64_t cpu_ticks_offset;
81
    int64_t cpu_clock_offset;
82
    int32_t cpu_ticks_enabled;
83
    int64_t dummy;
84
} TimersState;
85

    
86
TimersState timers_state;
87

    
88
/* Return the virtual CPU time, based on the instruction counter.  */
89
int64_t cpu_get_icount(void)
90
{
91
    int64_t icount;
92
    CPUState *env = cpu_single_env;;
93

    
94
    icount = qemu_icount;
95
    if (env) {
96
        if (!can_do_io(env)) {
97
            fprintf(stderr, "Bad clock read\n");
98
        }
99
        icount -= (env->icount_decr.u16.low + env->icount_extra);
100
    }
101
    return qemu_icount_bias + (icount << icount_time_shift);
102
}
103

    
104
/* return the host CPU cycle counter and handle stop/restart */
105
int64_t cpu_get_ticks(void)
106
{
107
    if (use_icount) {
108
        return cpu_get_icount();
109
    }
110
    if (!timers_state.cpu_ticks_enabled) {
111
        return timers_state.cpu_ticks_offset;
112
    } else {
113
        int64_t ticks;
114
        ticks = cpu_get_real_ticks();
115
        if (timers_state.cpu_ticks_prev > ticks) {
116
            /* Note: non increasing ticks may happen if the host uses
117
               software suspend */
118
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
119
        }
120
        timers_state.cpu_ticks_prev = ticks;
121
        return ticks + timers_state.cpu_ticks_offset;
122
    }
123
}
124

    
125
/* return the host CPU monotonic timer and handle stop/restart */
126
int64_t cpu_get_clock(void)
127
{
128
    int64_t ti;
129
    if (!timers_state.cpu_ticks_enabled) {
130
        return timers_state.cpu_clock_offset;
131
    } else {
132
        ti = get_clock();
133
        return ti + timers_state.cpu_clock_offset;
134
    }
135
}
136

    
137
/* enable cpu_get_ticks() */
138
void cpu_enable_ticks(void)
139
{
140
    if (!timers_state.cpu_ticks_enabled) {
141
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
142
        timers_state.cpu_clock_offset -= get_clock();
143
        timers_state.cpu_ticks_enabled = 1;
144
    }
145
}
146

    
147
/* disable cpu_get_ticks() : the clock is stopped. You must not call
148
   cpu_get_ticks() after that.  */
149
void cpu_disable_ticks(void)
150
{
151
    if (timers_state.cpu_ticks_enabled) {
152
        timers_state.cpu_ticks_offset = cpu_get_ticks();
153
        timers_state.cpu_clock_offset = cpu_get_clock();
154
        timers_state.cpu_ticks_enabled = 0;
155
    }
156
}
157

    
158
/* Correlation between real and virtual time is always going to be
159
   fairly approximate, so ignore small variation.
160
   When the guest is idle real and virtual time will be aligned in
161
   the IO wait loop.  */
162
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
163

    
164
static void icount_adjust(void)
165
{
166
    int64_t cur_time;
167
    int64_t cur_icount;
168
    int64_t delta;
169
    static int64_t last_delta;
170
    /* If the VM is not running, then do nothing.  */
171
    if (!runstate_is_running()) {
172
        return;
173
    }
174
    cur_time = cpu_get_clock();
175
    cur_icount = qemu_get_clock_ns(vm_clock);
176
    delta = cur_icount - cur_time;
177
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
178
    if (delta > 0
179
        && last_delta + ICOUNT_WOBBLE < delta * 2
180
        && icount_time_shift > 0) {
181
        /* The guest is getting too far ahead.  Slow time down.  */
182
        icount_time_shift--;
183
    }
184
    if (delta < 0
185
        && last_delta - ICOUNT_WOBBLE > delta * 2
186
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
187
        /* The guest is getting too far behind.  Speed time up.  */
188
        icount_time_shift++;
189
    }
190
    last_delta = delta;
191
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
192
}
193

    
194
static void icount_adjust_rt(void *opaque)
195
{
196
    qemu_mod_timer(icount_rt_timer,
197
                   qemu_get_clock_ms(rt_clock) + 1000);
198
    icount_adjust();
199
}
200

    
201
static void icount_adjust_vm(void *opaque)
202
{
203
    qemu_mod_timer(icount_vm_timer,
204
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
205
    icount_adjust();
206
}
207

    
208
static int64_t qemu_icount_round(int64_t count)
209
{
210
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
211
}
212

    
213
static void icount_warp_rt(void *opaque)
214
{
215
    if (vm_clock_warp_start == -1) {
216
        return;
217
    }
218

    
219
    if (runstate_is_running()) {
220
        int64_t clock = qemu_get_clock_ns(rt_clock);
221
        int64_t warp_delta = clock - vm_clock_warp_start;
222
        if (use_icount == 1) {
223
            qemu_icount_bias += warp_delta;
224
        } else {
225
            /*
226
             * In adaptive mode, do not let the vm_clock run too
227
             * far ahead of real time.
228
             */
229
            int64_t cur_time = cpu_get_clock();
230
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
231
            int64_t delta = cur_time - cur_icount;
232
            qemu_icount_bias += MIN(warp_delta, delta);
233
        }
234
        if (qemu_clock_expired(vm_clock)) {
235
            qemu_notify_event();
236
        }
237
    }
238
    vm_clock_warp_start = -1;
239
}
240

    
241
void qemu_clock_warp(QEMUClock *clock)
242
{
243
    int64_t deadline;
244

    
245
    /*
246
     * There are too many global variables to make the "warp" behavior
247
     * applicable to other clocks.  But a clock argument removes the
248
     * need for if statements all over the place.
249
     */
250
    if (clock != vm_clock || !use_icount) {
251
        return;
252
    }
253

    
254
    /*
255
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
256
     * ensures that the deadline for the timer is computed correctly below.
257
     * This also makes sure that the insn counter is synchronized before the
258
     * CPU starts running, in case the CPU is woken by an event other than
259
     * the earliest vm_clock timer.
260
     */
261
    icount_warp_rt(NULL);
262
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
263
        qemu_del_timer(icount_warp_timer);
264
        return;
265
    }
266

    
267
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
268
    deadline = qemu_clock_deadline(vm_clock);
269
    if (deadline > 0) {
270
        /*
271
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
272
         * sleep.  Otherwise, the CPU might be waiting for a future timer
273
         * interrupt to wake it up, but the interrupt never comes because
274
         * the vCPU isn't running any insns and thus doesn't advance the
275
         * vm_clock.
276
         *
277
         * An extreme solution for this problem would be to never let VCPUs
278
         * sleep in icount mode if there is a pending vm_clock timer; rather
279
         * time could just advance to the next vm_clock event.  Instead, we
280
         * do stop VCPUs and only advance vm_clock after some "real" time,
281
         * (related to the time left until the next event) has passed.  This
282
         * rt_clock timer will do this.  This avoids that the warps are too
283
         * visible externally---for example, you will not be sending network
284
         * packets continuously instead of every 100ms.
285
         */
286
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
287
    } else {
288
        qemu_notify_event();
289
    }
290
}
291

    
292
static const VMStateDescription vmstate_timers = {
293
    .name = "timer",
294
    .version_id = 2,
295
    .minimum_version_id = 1,
296
    .minimum_version_id_old = 1,
297
    .fields      = (VMStateField[]) {
298
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
299
        VMSTATE_INT64(dummy, TimersState),
300
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
301
        VMSTATE_END_OF_LIST()
302
    }
303
};
304

    
305
void configure_icount(const char *option)
306
{
307
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
308
    if (!option) {
309
        return;
310
    }
311

    
312
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
313
    if (strcmp(option, "auto") != 0) {
314
        icount_time_shift = strtol(option, NULL, 0);
315
        use_icount = 1;
316
        return;
317
    }
318

    
319
    use_icount = 2;
320

    
321
    /* 125MIPS seems a reasonable initial guess at the guest speed.
322
       It will be corrected fairly quickly anyway.  */
323
    icount_time_shift = 3;
324

    
325
    /* Have both realtime and virtual time triggers for speed adjustment.
326
       The realtime trigger catches emulated time passing too slowly,
327
       the virtual time trigger catches emulated time passing too fast.
328
       Realtime triggers occur even when idle, so use them less frequently
329
       than VM triggers.  */
330
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
331
    qemu_mod_timer(icount_rt_timer,
332
                   qemu_get_clock_ms(rt_clock) + 1000);
333
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
334
    qemu_mod_timer(icount_vm_timer,
335
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
336
}
337

    
338
/***********************************************************/
339
void hw_error(const char *fmt, ...)
340
{
341
    va_list ap;
342
    CPUState *env;
343

    
344
    va_start(ap, fmt);
345
    fprintf(stderr, "qemu: hardware error: ");
346
    vfprintf(stderr, fmt, ap);
347
    fprintf(stderr, "\n");
348
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
349
        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
350
#ifdef TARGET_I386
351
        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
352
#else
353
        cpu_dump_state(env, stderr, fprintf, 0);
354
#endif
355
    }
356
    va_end(ap);
357
    abort();
358
}
359

    
360
void cpu_synchronize_all_states(void)
361
{
362
    CPUState *cpu;
363

    
364
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
365
        cpu_synchronize_state(cpu);
366
    }
367
}
368

    
369
void cpu_synchronize_all_post_reset(void)
370
{
371
    CPUState *cpu;
372

    
373
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
374
        cpu_synchronize_post_reset(cpu);
375
    }
376
}
377

    
378
void cpu_synchronize_all_post_init(void)
379
{
380
    CPUState *cpu;
381

    
382
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
383
        cpu_synchronize_post_init(cpu);
384
    }
385
}
386

    
387
int cpu_is_stopped(CPUState *env)
388
{
389
    return !runstate_is_running() || env->stopped;
390
}
391

    
392
static void do_vm_stop(RunState state)
393
{
394
    if (runstate_is_running()) {
395
        cpu_disable_ticks();
396
        pause_all_vcpus();
397
        runstate_set(state);
398
        vm_state_notify(0, state);
399
        qemu_aio_flush();
400
        bdrv_flush_all();
401
        monitor_protocol_event(QEVENT_STOP, NULL);
402
    }
403
}
404

    
405
static int cpu_can_run(CPUState *env)
406
{
407
    if (env->stop) {
408
        return 0;
409
    }
410
    if (env->stopped || !runstate_is_running()) {
411
        return 0;
412
    }
413
    return 1;
414
}
415

    
416
static bool cpu_thread_is_idle(CPUState *env)
417
{
418
    if (env->stop || env->queued_work_first) {
419
        return false;
420
    }
421
    if (env->stopped || !runstate_is_running()) {
422
        return true;
423
    }
424
    if (!env->halted || qemu_cpu_has_work(env) ||
425
        (kvm_enabled() && kvm_irqchip_in_kernel())) {
426
        return false;
427
    }
428
    return true;
429
}
430

    
431
bool all_cpu_threads_idle(void)
432
{
433
    CPUState *env;
434

    
435
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
436
        if (!cpu_thread_is_idle(env)) {
437
            return false;
438
        }
439
    }
440
    return true;
441
}
442

    
443
static void cpu_handle_guest_debug(CPUState *env)
444
{
445
    gdb_set_stop_cpu(env);
446
    qemu_system_debug_request();
447
    env->stopped = 1;
448
}
449

    
450
static void cpu_signal(int sig)
451
{
452
    if (cpu_single_env) {
453
        cpu_exit(cpu_single_env);
454
    }
455
    exit_request = 1;
456
}
457

    
458
#ifdef CONFIG_LINUX
459
static void sigbus_reraise(void)
460
{
461
    sigset_t set;
462
    struct sigaction action;
463

    
464
    memset(&action, 0, sizeof(action));
465
    action.sa_handler = SIG_DFL;
466
    if (!sigaction(SIGBUS, &action, NULL)) {
467
        raise(SIGBUS);
468
        sigemptyset(&set);
469
        sigaddset(&set, SIGBUS);
470
        sigprocmask(SIG_UNBLOCK, &set, NULL);
471
    }
472
    perror("Failed to re-raise SIGBUS!\n");
473
    abort();
474
}
475

    
476
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
477
                           void *ctx)
478
{
479
    if (kvm_on_sigbus(siginfo->ssi_code,
480
                      (void *)(intptr_t)siginfo->ssi_addr)) {
481
        sigbus_reraise();
482
    }
483
}
484

    
485
static void qemu_init_sigbus(void)
486
{
487
    struct sigaction action;
488

    
489
    memset(&action, 0, sizeof(action));
490
    action.sa_flags = SA_SIGINFO;
491
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
492
    sigaction(SIGBUS, &action, NULL);
493

    
494
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
495
}
496

    
497
static void qemu_kvm_eat_signals(CPUState *env)
498
{
499
    struct timespec ts = { 0, 0 };
500
    siginfo_t siginfo;
501
    sigset_t waitset;
502
    sigset_t chkset;
503
    int r;
504

    
505
    sigemptyset(&waitset);
506
    sigaddset(&waitset, SIG_IPI);
507
    sigaddset(&waitset, SIGBUS);
508

    
509
    do {
510
        r = sigtimedwait(&waitset, &siginfo, &ts);
511
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
512
            perror("sigtimedwait");
513
            exit(1);
514
        }
515

    
516
        switch (r) {
517
        case SIGBUS:
518
            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
519
                sigbus_reraise();
520
            }
521
            break;
522
        default:
523
            break;
524
        }
525

    
526
        r = sigpending(&chkset);
527
        if (r == -1) {
528
            perror("sigpending");
529
            exit(1);
530
        }
531
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
532
}
533

    
534
#else /* !CONFIG_LINUX */
535

    
536
static void qemu_init_sigbus(void)
537
{
538
}
539

    
540
static void qemu_kvm_eat_signals(CPUState *env)
541
{
542
}
543
#endif /* !CONFIG_LINUX */
544

    
545
#ifndef _WIN32
546
static void dummy_signal(int sig)
547
{
548
}
549

    
550
static void qemu_kvm_init_cpu_signals(CPUState *env)
551
{
552
    int r;
553
    sigset_t set;
554
    struct sigaction sigact;
555

    
556
    memset(&sigact, 0, sizeof(sigact));
557
    sigact.sa_handler = dummy_signal;
558
    sigaction(SIG_IPI, &sigact, NULL);
559

    
560
    pthread_sigmask(SIG_BLOCK, NULL, &set);
561
    sigdelset(&set, SIG_IPI);
562
    sigdelset(&set, SIGBUS);
563
    r = kvm_set_signal_mask(env, &set);
564
    if (r) {
565
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
566
        exit(1);
567
    }
568

    
569
    sigdelset(&set, SIG_IPI);
570
    sigdelset(&set, SIGBUS);
571
    r = kvm_set_signal_mask(env, &set);
572
    if (r) {
573
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
574
        exit(1);
575
    }
576
}
577

    
578
static void qemu_tcg_init_cpu_signals(void)
579
{
580
    sigset_t set;
581
    struct sigaction sigact;
582

    
583
    memset(&sigact, 0, sizeof(sigact));
584
    sigact.sa_handler = cpu_signal;
585
    sigaction(SIG_IPI, &sigact, NULL);
586

    
587
    sigemptyset(&set);
588
    sigaddset(&set, SIG_IPI);
589
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
590
}
591

    
592
#else /* _WIN32 */
593
static void qemu_kvm_init_cpu_signals(CPUState *env)
594
{
595
    abort();
596
}
597

    
598
static void qemu_tcg_init_cpu_signals(void)
599
{
600
}
601
#endif /* _WIN32 */
602

    
603
QemuMutex qemu_global_mutex;
604
static QemuCond qemu_io_proceeded_cond;
605
static bool iothread_requesting_mutex;
606

    
607
static QemuThread io_thread;
608

    
609
static QemuThread *tcg_cpu_thread;
610
static QemuCond *tcg_halt_cond;
611

    
612
/* cpu creation */
613
static QemuCond qemu_cpu_cond;
614
/* system init */
615
static QemuCond qemu_pause_cond;
616
static QemuCond qemu_work_cond;
617

    
618
void qemu_init_cpu_loop(void)
619
{
620
    qemu_init_sigbus();
621
    qemu_cond_init(&qemu_cpu_cond);
622
    qemu_cond_init(&qemu_pause_cond);
623
    qemu_cond_init(&qemu_work_cond);
624
    qemu_cond_init(&qemu_io_proceeded_cond);
625
    qemu_mutex_init(&qemu_global_mutex);
626

    
627
    qemu_thread_get_self(&io_thread);
628
}
629

    
630
void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
631
{
632
    struct qemu_work_item wi;
633

    
634
    if (qemu_cpu_is_self(env)) {
635
        func(data);
636
        return;
637
    }
638

    
639
    wi.func = func;
640
    wi.data = data;
641
    if (!env->queued_work_first) {
642
        env->queued_work_first = &wi;
643
    } else {
644
        env->queued_work_last->next = &wi;
645
    }
646
    env->queued_work_last = &wi;
647
    wi.next = NULL;
648
    wi.done = false;
649

    
650
    qemu_cpu_kick(env);
651
    while (!wi.done) {
652
        CPUState *self_env = cpu_single_env;
653

    
654
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
655
        cpu_single_env = self_env;
656
    }
657
}
658

    
659
static void flush_queued_work(CPUState *env)
660
{
661
    struct qemu_work_item *wi;
662

    
663
    if (!env->queued_work_first) {
664
        return;
665
    }
666

    
667
    while ((wi = env->queued_work_first)) {
668
        env->queued_work_first = wi->next;
669
        wi->func(wi->data);
670
        wi->done = true;
671
    }
672
    env->queued_work_last = NULL;
673
    qemu_cond_broadcast(&qemu_work_cond);
674
}
675

    
676
static void qemu_wait_io_event_common(CPUState *env)
677
{
678
    if (env->stop) {
679
        env->stop = 0;
680
        env->stopped = 1;
681
        qemu_cond_signal(&qemu_pause_cond);
682
    }
683
    flush_queued_work(env);
684
    env->thread_kicked = false;
685
}
686

    
687
static void qemu_tcg_wait_io_event(void)
688
{
689
    CPUState *env;
690

    
691
    while (all_cpu_threads_idle()) {
692
       /* Start accounting real time to the virtual clock if the CPUs
693
          are idle.  */
694
        qemu_clock_warp(vm_clock);
695
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
696
    }
697

    
698
    while (iothread_requesting_mutex) {
699
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
700
    }
701

    
702
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
703
        qemu_wait_io_event_common(env);
704
    }
705
}
706

    
707
static void qemu_kvm_wait_io_event(CPUState *env)
708
{
709
    while (cpu_thread_is_idle(env)) {
710
        qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
711
    }
712

    
713
    qemu_kvm_eat_signals(env);
714
    qemu_wait_io_event_common(env);
715
}
716

    
717
static void *qemu_kvm_cpu_thread_fn(void *arg)
718
{
719
    CPUState *env = arg;
720
    int r;
721

    
722
    qemu_mutex_lock(&qemu_global_mutex);
723
    qemu_thread_get_self(env->thread);
724
    env->thread_id = qemu_get_thread_id();
725

    
726
    r = kvm_init_vcpu(env);
727
    if (r < 0) {
728
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
729
        exit(1);
730
    }
731

    
732
    qemu_kvm_init_cpu_signals(env);
733

    
734
    /* signal CPU creation */
735
    env->created = 1;
736
    qemu_cond_signal(&qemu_cpu_cond);
737

    
738
    while (1) {
739
        if (cpu_can_run(env)) {
740
            r = kvm_cpu_exec(env);
741
            if (r == EXCP_DEBUG) {
742
                cpu_handle_guest_debug(env);
743
            }
744
        }
745
        qemu_kvm_wait_io_event(env);
746
    }
747

    
748
    return NULL;
749
}
750

    
751
static void tcg_exec_all(void);
752

    
753
static void *qemu_tcg_cpu_thread_fn(void *arg)
754
{
755
    CPUState *env = arg;
756

    
757
    qemu_tcg_init_cpu_signals();
758
    qemu_thread_get_self(env->thread);
759

    
760
    /* signal CPU creation */
761
    qemu_mutex_lock(&qemu_global_mutex);
762
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
763
        env->thread_id = qemu_get_thread_id();
764
        env->created = 1;
765
    }
766
    qemu_cond_signal(&qemu_cpu_cond);
767

    
768
    /* wait for initial kick-off after machine start */
769
    while (first_cpu->stopped) {
770
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
771
    }
772

    
773
    while (1) {
774
        tcg_exec_all();
775
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
776
            qemu_notify_event();
777
        }
778
        qemu_tcg_wait_io_event();
779
    }
780

    
781
    return NULL;
782
}
783

    
784
static void qemu_cpu_kick_thread(CPUState *env)
785
{
786
#ifndef _WIN32
787
    int err;
788

    
789
    err = pthread_kill(env->thread->thread, SIG_IPI);
790
    if (err) {
791
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
792
        exit(1);
793
    }
794
#else /* _WIN32 */
795
    if (!qemu_cpu_is_self(env)) {
796
        SuspendThread(env->thread->thread);
797
        cpu_signal(0);
798
        ResumeThread(env->thread->thread);
799
    }
800
#endif
801
}
802

    
803
void qemu_cpu_kick(void *_env)
804
{
805
    CPUState *env = _env;
806

    
807
    qemu_cond_broadcast(env->halt_cond);
808
    if (kvm_enabled() && !env->thread_kicked) {
809
        qemu_cpu_kick_thread(env);
810
        env->thread_kicked = true;
811
    }
812
}
813

    
814
void qemu_cpu_kick_self(void)
815
{
816
#ifndef _WIN32
817
    assert(cpu_single_env);
818

    
819
    if (!cpu_single_env->thread_kicked) {
820
        qemu_cpu_kick_thread(cpu_single_env);
821
        cpu_single_env->thread_kicked = true;
822
    }
823
#else
824
    abort();
825
#endif
826
}
827

    
828
int qemu_cpu_is_self(void *_env)
829
{
830
    CPUState *env = _env;
831

    
832
    return qemu_thread_is_self(env->thread);
833
}
834

    
835
void qemu_mutex_lock_iothread(void)
836
{
837
    if (kvm_enabled()) {
838
        qemu_mutex_lock(&qemu_global_mutex);
839
    } else {
840
        iothread_requesting_mutex = true;
841
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
842
            qemu_cpu_kick_thread(first_cpu);
843
            qemu_mutex_lock(&qemu_global_mutex);
844
        }
845
        iothread_requesting_mutex = false;
846
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
847
    }
848
}
849

    
850
void qemu_mutex_unlock_iothread(void)
851
{
852
    qemu_mutex_unlock(&qemu_global_mutex);
853
}
854

    
855
static int all_vcpus_paused(void)
856
{
857
    CPUState *penv = first_cpu;
858

    
859
    while (penv) {
860
        if (!penv->stopped) {
861
            return 0;
862
        }
863
        penv = (CPUState *)penv->next_cpu;
864
    }
865

    
866
    return 1;
867
}
868

    
869
void pause_all_vcpus(void)
870
{
871
    CPUState *penv = first_cpu;
872

    
873
    qemu_clock_enable(vm_clock, false);
874
    while (penv) {
875
        penv->stop = 1;
876
        qemu_cpu_kick(penv);
877
        penv = (CPUState *)penv->next_cpu;
878
    }
879

    
880
    while (!all_vcpus_paused()) {
881
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
882
        penv = first_cpu;
883
        while (penv) {
884
            qemu_cpu_kick(penv);
885
            penv = (CPUState *)penv->next_cpu;
886
        }
887
    }
888
}
889

    
890
void resume_all_vcpus(void)
891
{
892
    CPUState *penv = first_cpu;
893

    
894
    qemu_clock_enable(vm_clock, true);
895
    while (penv) {
896
        penv->stop = 0;
897
        penv->stopped = 0;
898
        qemu_cpu_kick(penv);
899
        penv = (CPUState *)penv->next_cpu;
900
    }
901
}
902

    
903
static void qemu_tcg_init_vcpu(void *_env)
904
{
905
    CPUState *env = _env;
906

    
907
    /* share a single thread for all cpus with TCG */
908
    if (!tcg_cpu_thread) {
909
        env->thread = g_malloc0(sizeof(QemuThread));
910
        env->halt_cond = g_malloc0(sizeof(QemuCond));
911
        qemu_cond_init(env->halt_cond);
912
        tcg_halt_cond = env->halt_cond;
913
        qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env);
914
        while (env->created == 0) {
915
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
916
        }
917
        tcg_cpu_thread = env->thread;
918
    } else {
919
        env->thread = tcg_cpu_thread;
920
        env->halt_cond = tcg_halt_cond;
921
    }
922
}
923

    
924
static void qemu_kvm_start_vcpu(CPUState *env)
925
{
926
    env->thread = g_malloc0(sizeof(QemuThread));
927
    env->halt_cond = g_malloc0(sizeof(QemuCond));
928
    qemu_cond_init(env->halt_cond);
929
    qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env);
930
    while (env->created == 0) {
931
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
932
    }
933
}
934

    
935
void qemu_init_vcpu(void *_env)
936
{
937
    CPUState *env = _env;
938

    
939
    env->nr_cores = smp_cores;
940
    env->nr_threads = smp_threads;
941
    env->stopped = 1;
942
    if (kvm_enabled()) {
943
        qemu_kvm_start_vcpu(env);
944
    } else {
945
        qemu_tcg_init_vcpu(env);
946
    }
947
}
948

    
949
void cpu_stop_current(void)
950
{
951
    if (cpu_single_env) {
952
        cpu_single_env->stop = 0;
953
        cpu_single_env->stopped = 1;
954
        cpu_exit(cpu_single_env);
955
        qemu_cond_signal(&qemu_pause_cond);
956
    }
957
}
958

    
959
void vm_stop(RunState state)
960
{
961
    if (!qemu_thread_is_self(&io_thread)) {
962
        qemu_system_vmstop_request(state);
963
        /*
964
         * FIXME: should not return to device code in case
965
         * vm_stop() has been requested.
966
         */
967
        cpu_stop_current();
968
        return;
969
    }
970
    do_vm_stop(state);
971
}
972

    
973
/* does a state transition even if the VM is already stopped,
974
   current state is forgotten forever */
975
void vm_stop_force_state(RunState state)
976
{
977
    if (runstate_is_running()) {
978
        vm_stop(state);
979
    } else {
980
        runstate_set(state);
981
    }
982
}
983

    
984
static int tcg_cpu_exec(CPUState *env)
985
{
986
    int ret;
987
#ifdef CONFIG_PROFILER
988
    int64_t ti;
989
#endif
990

    
991
#ifdef CONFIG_PROFILER
992
    ti = profile_getclock();
993
#endif
994
    if (use_icount) {
995
        int64_t count;
996
        int decr;
997
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
998
        env->icount_decr.u16.low = 0;
999
        env->icount_extra = 0;
1000
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1001
        qemu_icount += count;
1002
        decr = (count > 0xffff) ? 0xffff : count;
1003
        count -= decr;
1004
        env->icount_decr.u16.low = decr;
1005
        env->icount_extra = count;
1006
    }
1007
    ret = cpu_exec(env);
1008
#ifdef CONFIG_PROFILER
1009
    qemu_time += profile_getclock() - ti;
1010
#endif
1011
    if (use_icount) {
1012
        /* Fold pending instructions back into the
1013
           instruction counter, and clear the interrupt flag.  */
1014
        qemu_icount -= (env->icount_decr.u16.low
1015
                        + env->icount_extra);
1016
        env->icount_decr.u32 = 0;
1017
        env->icount_extra = 0;
1018
    }
1019
    return ret;
1020
}
1021

    
1022
static void tcg_exec_all(void)
1023
{
1024
    int r;
1025

    
1026
    /* Account partial waits to the vm_clock.  */
1027
    qemu_clock_warp(vm_clock);
1028

    
1029
    if (next_cpu == NULL) {
1030
        next_cpu = first_cpu;
1031
    }
1032
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1033
        CPUState *env = next_cpu;
1034

    
1035
        qemu_clock_enable(vm_clock,
1036
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1037

    
1038
        if (cpu_can_run(env)) {
1039
            r = tcg_cpu_exec(env);
1040
            if (r == EXCP_DEBUG) {
1041
                cpu_handle_guest_debug(env);
1042
                break;
1043
            }
1044
        } else if (env->stop || env->stopped) {
1045
            break;
1046
        }
1047
    }
1048
    exit_request = 0;
1049
}
1050

    
1051
void set_numa_modes(void)
1052
{
1053
    CPUState *env;
1054
    int i;
1055

    
1056
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1057
        for (i = 0; i < nb_numa_nodes; i++) {
1058
            if (node_cpumask[i] & (1 << env->cpu_index)) {
1059
                env->numa_node = i;
1060
            }
1061
        }
1062
    }
1063
}
1064

    
1065
void set_cpu_log(const char *optarg)
1066
{
1067
    int mask;
1068
    const CPULogItem *item;
1069

    
1070
    mask = cpu_str_to_log_mask(optarg);
1071
    if (!mask) {
1072
        printf("Log items (comma separated):\n");
1073
        for (item = cpu_log_items; item->mask != 0; item++) {
1074
            printf("%-10s %s\n", item->name, item->help);
1075
        }
1076
        exit(1);
1077
    }
1078
    cpu_set_log(mask);
1079
}
1080

    
1081
void set_cpu_log_filename(const char *optarg)
1082
{
1083
    cpu_set_log_filename(optarg);
1084
}
1085

    
1086
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1087
{
1088
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1089
#if defined(cpu_list_id)
1090
    cpu_list_id(f, cpu_fprintf, optarg);
1091
#elif defined(cpu_list)
1092
    cpu_list(f, cpu_fprintf); /* deprecated */
1093
#endif
1094
}
1095

    
1096
CpuInfoList *qmp_query_cpus(Error **errp)
1097
{
1098
    CpuInfoList *head = NULL, *cur_item = NULL;
1099
    CPUState *env;
1100

    
1101
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1102
        CpuInfoList *info;
1103

    
1104
        cpu_synchronize_state(env);
1105

    
1106
        info = g_malloc0(sizeof(*info));
1107
        info->value = g_malloc0(sizeof(*info->value));
1108
        info->value->CPU = env->cpu_index;
1109
        info->value->current = (env == first_cpu);
1110
        info->value->halted = env->halted;
1111
        info->value->thread_id = env->thread_id;
1112
#if defined(TARGET_I386)
1113
        info->value->has_pc = true;
1114
        info->value->pc = env->eip + env->segs[R_CS].base;
1115
#elif defined(TARGET_PPC)
1116
        info->value->has_nip = true;
1117
        info->value->nip = env->nip;
1118
#elif defined(TARGET_SPARC)
1119
        info->value->has_pc = true;
1120
        info->value->pc = env->pc;
1121
        info->value->has_npc = true;
1122
        info->value->npc = env->npc;
1123
#elif defined(TARGET_MIPS)
1124
        info->value->has_PC = true;
1125
        info->value->PC = env->active_tc.PC;
1126
#endif
1127

    
1128
        /* XXX: waiting for the qapi to support GSList */
1129
        if (!cur_item) {
1130
            head = cur_item = info;
1131
        } else {
1132
            cur_item->next = info;
1133
            cur_item = info;
1134
        }
1135
    }
1136

    
1137
    return head;
1138
}