Statistics
| Branch: | Revision:

root / cpus.c @ 23b96cdb

History | View | Annotate | Download (30.1 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor.h"
29
#include "sysemu.h"
30
#include "gdbstub.h"
31
#include "dma.h"
32
#include "kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu-thread.h"
36
#include "cpus.h"
37
#include "main-loop.h"
38

    
39
#ifndef _WIN32
40
#include "compatfd.h"
41
#endif
42

    
43
#ifdef CONFIG_LINUX
44

    
45
#include <sys/prctl.h>
46

    
47
#ifndef PR_MCE_KILL
48
#define PR_MCE_KILL 33
49
#endif
50

    
51
#ifndef PR_MCE_KILL_SET
52
#define PR_MCE_KILL_SET 1
53
#endif
54

    
55
#ifndef PR_MCE_KILL_EARLY
56
#define PR_MCE_KILL_EARLY 1
57
#endif
58

    
59
#endif /* CONFIG_LINUX */
60

    
61
static CPUState *next_cpu;
62

    
63
/***********************************************************/
64
/* guest cycle counter */
65

    
66
/* Conversion factor from emulated instructions to virtual clock ticks.  */
67
static int icount_time_shift;
68
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
69
#define MAX_ICOUNT_SHIFT 10
70
/* Compensate for varying guest execution speed.  */
71
static int64_t qemu_icount_bias;
72
static QEMUTimer *icount_rt_timer;
73
static QEMUTimer *icount_vm_timer;
74
static QEMUTimer *icount_warp_timer;
75
static int64_t vm_clock_warp_start;
76
static int64_t qemu_icount;
77

    
78
typedef struct TimersState {
79
    int64_t cpu_ticks_prev;
80
    int64_t cpu_ticks_offset;
81
    int64_t cpu_clock_offset;
82
    int32_t cpu_ticks_enabled;
83
    int64_t dummy;
84
} TimersState;
85

    
86
TimersState timers_state;
87

    
88
/* Return the virtual CPU time, based on the instruction counter.  */
89
int64_t cpu_get_icount(void)
90
{
91
    int64_t icount;
92
    CPUState *env = cpu_single_env;
93

    
94
    icount = qemu_icount;
95
    if (env) {
96
        if (!can_do_io(env)) {
97
            fprintf(stderr, "Bad clock read\n");
98
        }
99
        icount -= (env->icount_decr.u16.low + env->icount_extra);
100
    }
101
    return qemu_icount_bias + (icount << icount_time_shift);
102
}
103

    
104
/* return the host CPU cycle counter and handle stop/restart */
105
int64_t cpu_get_ticks(void)
106
{
107
    if (use_icount) {
108
        return cpu_get_icount();
109
    }
110
    if (!timers_state.cpu_ticks_enabled) {
111
        return timers_state.cpu_ticks_offset;
112
    } else {
113
        int64_t ticks;
114
        ticks = cpu_get_real_ticks();
115
        if (timers_state.cpu_ticks_prev > ticks) {
116
            /* Note: non increasing ticks may happen if the host uses
117
               software suspend */
118
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
119
        }
120
        timers_state.cpu_ticks_prev = ticks;
121
        return ticks + timers_state.cpu_ticks_offset;
122
    }
123
}
124

    
125
/* return the host CPU monotonic timer and handle stop/restart */
126
int64_t cpu_get_clock(void)
127
{
128
    int64_t ti;
129
    if (!timers_state.cpu_ticks_enabled) {
130
        return timers_state.cpu_clock_offset;
131
    } else {
132
        ti = get_clock();
133
        return ti + timers_state.cpu_clock_offset;
134
    }
135
}
136

    
137
/* enable cpu_get_ticks() */
138
void cpu_enable_ticks(void)
139
{
140
    if (!timers_state.cpu_ticks_enabled) {
141
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
142
        timers_state.cpu_clock_offset -= get_clock();
143
        timers_state.cpu_ticks_enabled = 1;
144
    }
145
}
146

    
147
/* disable cpu_get_ticks() : the clock is stopped. You must not call
148
   cpu_get_ticks() after that.  */
149
void cpu_disable_ticks(void)
150
{
151
    if (timers_state.cpu_ticks_enabled) {
152
        timers_state.cpu_ticks_offset = cpu_get_ticks();
153
        timers_state.cpu_clock_offset = cpu_get_clock();
154
        timers_state.cpu_ticks_enabled = 0;
155
    }
156
}
157

    
158
/* Correlation between real and virtual time is always going to be
159
   fairly approximate, so ignore small variation.
160
   When the guest is idle real and virtual time will be aligned in
161
   the IO wait loop.  */
162
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
163

    
164
static void icount_adjust(void)
165
{
166
    int64_t cur_time;
167
    int64_t cur_icount;
168
    int64_t delta;
169
    static int64_t last_delta;
170
    /* If the VM is not running, then do nothing.  */
171
    if (!runstate_is_running()) {
172
        return;
173
    }
174
    cur_time = cpu_get_clock();
175
    cur_icount = qemu_get_clock_ns(vm_clock);
176
    delta = cur_icount - cur_time;
177
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
178
    if (delta > 0
179
        && last_delta + ICOUNT_WOBBLE < delta * 2
180
        && icount_time_shift > 0) {
181
        /* The guest is getting too far ahead.  Slow time down.  */
182
        icount_time_shift--;
183
    }
184
    if (delta < 0
185
        && last_delta - ICOUNT_WOBBLE > delta * 2
186
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
187
        /* The guest is getting too far behind.  Speed time up.  */
188
        icount_time_shift++;
189
    }
190
    last_delta = delta;
191
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
192
}
193

    
194
static void icount_adjust_rt(void *opaque)
195
{
196
    qemu_mod_timer(icount_rt_timer,
197
                   qemu_get_clock_ms(rt_clock) + 1000);
198
    icount_adjust();
199
}
200

    
201
static void icount_adjust_vm(void *opaque)
202
{
203
    qemu_mod_timer(icount_vm_timer,
204
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
205
    icount_adjust();
206
}
207

    
208
static int64_t qemu_icount_round(int64_t count)
209
{
210
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
211
}
212

    
213
static void icount_warp_rt(void *opaque)
214
{
215
    if (vm_clock_warp_start == -1) {
216
        return;
217
    }
218

    
219
    if (runstate_is_running()) {
220
        int64_t clock = qemu_get_clock_ns(rt_clock);
221
        int64_t warp_delta = clock - vm_clock_warp_start;
222
        if (use_icount == 1) {
223
            qemu_icount_bias += warp_delta;
224
        } else {
225
            /*
226
             * In adaptive mode, do not let the vm_clock run too
227
             * far ahead of real time.
228
             */
229
            int64_t cur_time = cpu_get_clock();
230
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
231
            int64_t delta = cur_time - cur_icount;
232
            qemu_icount_bias += MIN(warp_delta, delta);
233
        }
234
        if (qemu_clock_expired(vm_clock)) {
235
            qemu_notify_event();
236
        }
237
    }
238
    vm_clock_warp_start = -1;
239
}
240

    
241
void qemu_clock_warp(QEMUClock *clock)
242
{
243
    int64_t deadline;
244

    
245
    /*
246
     * There are too many global variables to make the "warp" behavior
247
     * applicable to other clocks.  But a clock argument removes the
248
     * need for if statements all over the place.
249
     */
250
    if (clock != vm_clock || !use_icount) {
251
        return;
252
    }
253

    
254
    /*
255
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
256
     * ensures that the deadline for the timer is computed correctly below.
257
     * This also makes sure that the insn counter is synchronized before the
258
     * CPU starts running, in case the CPU is woken by an event other than
259
     * the earliest vm_clock timer.
260
     */
261
    icount_warp_rt(NULL);
262
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
263
        qemu_del_timer(icount_warp_timer);
264
        return;
265
    }
266

    
267
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
268
    deadline = qemu_clock_deadline(vm_clock);
269
    if (deadline > 0) {
270
        /*
271
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
272
         * sleep.  Otherwise, the CPU might be waiting for a future timer
273
         * interrupt to wake it up, but the interrupt never comes because
274
         * the vCPU isn't running any insns and thus doesn't advance the
275
         * vm_clock.
276
         *
277
         * An extreme solution for this problem would be to never let VCPUs
278
         * sleep in icount mode if there is a pending vm_clock timer; rather
279
         * time could just advance to the next vm_clock event.  Instead, we
280
         * do stop VCPUs and only advance vm_clock after some "real" time,
281
         * (related to the time left until the next event) has passed.  This
282
         * rt_clock timer will do this.  This avoids that the warps are too
283
         * visible externally---for example, you will not be sending network
284
         * packets continuously instead of every 100ms.
285
         */
286
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
287
    } else {
288
        qemu_notify_event();
289
    }
290
}
291

    
292
static const VMStateDescription vmstate_timers = {
293
    .name = "timer",
294
    .version_id = 2,
295
    .minimum_version_id = 1,
296
    .minimum_version_id_old = 1,
297
    .fields      = (VMStateField[]) {
298
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
299
        VMSTATE_INT64(dummy, TimersState),
300
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
301
        VMSTATE_END_OF_LIST()
302
    }
303
};
304

    
305
void configure_icount(const char *option)
306
{
307
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
308
    if (!option) {
309
        return;
310
    }
311

    
312
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
313
    if (strcmp(option, "auto") != 0) {
314
        icount_time_shift = strtol(option, NULL, 0);
315
        use_icount = 1;
316
        return;
317
    }
318

    
319
    use_icount = 2;
320

    
321
    /* 125MIPS seems a reasonable initial guess at the guest speed.
322
       It will be corrected fairly quickly anyway.  */
323
    icount_time_shift = 3;
324

    
325
    /* Have both realtime and virtual time triggers for speed adjustment.
326
       The realtime trigger catches emulated time passing too slowly,
327
       the virtual time trigger catches emulated time passing too fast.
328
       Realtime triggers occur even when idle, so use them less frequently
329
       than VM triggers.  */
330
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
331
    qemu_mod_timer(icount_rt_timer,
332
                   qemu_get_clock_ms(rt_clock) + 1000);
333
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
334
    qemu_mod_timer(icount_vm_timer,
335
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
336
}
337

    
338
/***********************************************************/
339
void hw_error(const char *fmt, ...)
340
{
341
    va_list ap;
342
    CPUState *env;
343

    
344
    va_start(ap, fmt);
345
    fprintf(stderr, "qemu: hardware error: ");
346
    vfprintf(stderr, fmt, ap);
347
    fprintf(stderr, "\n");
348
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
349
        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
350
#ifdef TARGET_I386
351
        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
352
#else
353
        cpu_dump_state(env, stderr, fprintf, 0);
354
#endif
355
    }
356
    va_end(ap);
357
    abort();
358
}
359

    
360
void cpu_synchronize_all_states(void)
361
{
362
    CPUState *cpu;
363

    
364
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
365
        cpu_synchronize_state(cpu);
366
    }
367
}
368

    
369
void cpu_synchronize_all_post_reset(void)
370
{
371
    CPUState *cpu;
372

    
373
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
374
        cpu_synchronize_post_reset(cpu);
375
    }
376
}
377

    
378
void cpu_synchronize_all_post_init(void)
379
{
380
    CPUState *cpu;
381

    
382
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
383
        cpu_synchronize_post_init(cpu);
384
    }
385
}
386

    
387
int cpu_is_stopped(CPUState *env)
388
{
389
    return !runstate_is_running() || env->stopped;
390
}
391

    
392
static void do_vm_stop(RunState state)
393
{
394
    if (runstate_is_running()) {
395
        cpu_disable_ticks();
396
        pause_all_vcpus();
397
        runstate_set(state);
398
        vm_state_notify(0, state);
399
        bdrv_drain_all();
400
        bdrv_flush_all();
401
        monitor_protocol_event(QEVENT_STOP, NULL);
402
    }
403
}
404

    
405
static int cpu_can_run(CPUState *env)
406
{
407
    if (env->stop) {
408
        return 0;
409
    }
410
    if (env->stopped || !runstate_is_running()) {
411
        return 0;
412
    }
413
    return 1;
414
}
415

    
416
static bool cpu_thread_is_idle(CPUState *env)
417
{
418
    if (env->stop || env->queued_work_first) {
419
        return false;
420
    }
421
    if (env->stopped || !runstate_is_running()) {
422
        return true;
423
    }
424
    if (!env->halted || qemu_cpu_has_work(env) ||
425
        (kvm_enabled() && kvm_irqchip_in_kernel())) {
426
        return false;
427
    }
428
    return true;
429
}
430

    
431
bool all_cpu_threads_idle(void)
432
{
433
    CPUState *env;
434

    
435
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
436
        if (!cpu_thread_is_idle(env)) {
437
            return false;
438
        }
439
    }
440
    return true;
441
}
442

    
443
static void cpu_handle_guest_debug(CPUState *env)
444
{
445
    gdb_set_stop_cpu(env);
446
    qemu_system_debug_request();
447
    env->stopped = 1;
448
}
449

    
450
static void cpu_signal(int sig)
451
{
452
    if (cpu_single_env) {
453
        cpu_exit(cpu_single_env);
454
    }
455
    exit_request = 1;
456
}
457

    
458
#ifdef CONFIG_LINUX
459
static void sigbus_reraise(void)
460
{
461
    sigset_t set;
462
    struct sigaction action;
463

    
464
    memset(&action, 0, sizeof(action));
465
    action.sa_handler = SIG_DFL;
466
    if (!sigaction(SIGBUS, &action, NULL)) {
467
        raise(SIGBUS);
468
        sigemptyset(&set);
469
        sigaddset(&set, SIGBUS);
470
        sigprocmask(SIG_UNBLOCK, &set, NULL);
471
    }
472
    perror("Failed to re-raise SIGBUS!\n");
473
    abort();
474
}
475

    
476
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
477
                           void *ctx)
478
{
479
    if (kvm_on_sigbus(siginfo->ssi_code,
480
                      (void *)(intptr_t)siginfo->ssi_addr)) {
481
        sigbus_reraise();
482
    }
483
}
484

    
485
static void qemu_init_sigbus(void)
486
{
487
    struct sigaction action;
488

    
489
    memset(&action, 0, sizeof(action));
490
    action.sa_flags = SA_SIGINFO;
491
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
492
    sigaction(SIGBUS, &action, NULL);
493

    
494
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
495
}
496

    
497
static void qemu_kvm_eat_signals(CPUState *env)
498
{
499
    struct timespec ts = { 0, 0 };
500
    siginfo_t siginfo;
501
    sigset_t waitset;
502
    sigset_t chkset;
503
    int r;
504

    
505
    sigemptyset(&waitset);
506
    sigaddset(&waitset, SIG_IPI);
507
    sigaddset(&waitset, SIGBUS);
508

    
509
    do {
510
        r = sigtimedwait(&waitset, &siginfo, &ts);
511
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
512
            perror("sigtimedwait");
513
            exit(1);
514
        }
515

    
516
        switch (r) {
517
        case SIGBUS:
518
            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
519
                sigbus_reraise();
520
            }
521
            break;
522
        default:
523
            break;
524
        }
525

    
526
        r = sigpending(&chkset);
527
        if (r == -1) {
528
            perror("sigpending");
529
            exit(1);
530
        }
531
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
532
}
533

    
534
#else /* !CONFIG_LINUX */
535

    
536
static void qemu_init_sigbus(void)
537
{
538
}
539

    
540
static void qemu_kvm_eat_signals(CPUState *env)
541
{
542
}
543
#endif /* !CONFIG_LINUX */
544

    
545
#ifndef _WIN32
546
static void dummy_signal(int sig)
547
{
548
}
549

    
550
static void qemu_kvm_init_cpu_signals(CPUState *env)
551
{
552
    int r;
553
    sigset_t set;
554
    struct sigaction sigact;
555

    
556
    memset(&sigact, 0, sizeof(sigact));
557
    sigact.sa_handler = dummy_signal;
558
    sigaction(SIG_IPI, &sigact, NULL);
559

    
560
    pthread_sigmask(SIG_BLOCK, NULL, &set);
561
    sigdelset(&set, SIG_IPI);
562
    sigdelset(&set, SIGBUS);
563
    r = kvm_set_signal_mask(env, &set);
564
    if (r) {
565
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
566
        exit(1);
567
    }
568
}
569

    
570
static void qemu_tcg_init_cpu_signals(void)
571
{
572
    sigset_t set;
573
    struct sigaction sigact;
574

    
575
    memset(&sigact, 0, sizeof(sigact));
576
    sigact.sa_handler = cpu_signal;
577
    sigaction(SIG_IPI, &sigact, NULL);
578

    
579
    sigemptyset(&set);
580
    sigaddset(&set, SIG_IPI);
581
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
582
}
583

    
584
#else /* _WIN32 */
585
static void qemu_kvm_init_cpu_signals(CPUState *env)
586
{
587
    abort();
588
}
589

    
590
static void qemu_tcg_init_cpu_signals(void)
591
{
592
}
593
#endif /* _WIN32 */
594

    
595
QemuMutex qemu_global_mutex;
596
static QemuCond qemu_io_proceeded_cond;
597
static bool iothread_requesting_mutex;
598

    
599
static QemuThread io_thread;
600

    
601
static QemuThread *tcg_cpu_thread;
602
static QemuCond *tcg_halt_cond;
603

    
604
/* cpu creation */
605
static QemuCond qemu_cpu_cond;
606
/* system init */
607
static QemuCond qemu_pause_cond;
608
static QemuCond qemu_work_cond;
609

    
610
void qemu_init_cpu_loop(void)
611
{
612
    qemu_init_sigbus();
613
    qemu_cond_init(&qemu_cpu_cond);
614
    qemu_cond_init(&qemu_pause_cond);
615
    qemu_cond_init(&qemu_work_cond);
616
    qemu_cond_init(&qemu_io_proceeded_cond);
617
    qemu_mutex_init(&qemu_global_mutex);
618

    
619
    qemu_thread_get_self(&io_thread);
620
}
621

    
622
void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
623
{
624
    struct qemu_work_item wi;
625

    
626
    if (qemu_cpu_is_self(env)) {
627
        func(data);
628
        return;
629
    }
630

    
631
    wi.func = func;
632
    wi.data = data;
633
    if (!env->queued_work_first) {
634
        env->queued_work_first = &wi;
635
    } else {
636
        env->queued_work_last->next = &wi;
637
    }
638
    env->queued_work_last = &wi;
639
    wi.next = NULL;
640
    wi.done = false;
641

    
642
    qemu_cpu_kick(env);
643
    while (!wi.done) {
644
        CPUState *self_env = cpu_single_env;
645

    
646
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
647
        cpu_single_env = self_env;
648
    }
649
}
650

    
651
static void flush_queued_work(CPUState *env)
652
{
653
    struct qemu_work_item *wi;
654

    
655
    if (!env->queued_work_first) {
656
        return;
657
    }
658

    
659
    while ((wi = env->queued_work_first)) {
660
        env->queued_work_first = wi->next;
661
        wi->func(wi->data);
662
        wi->done = true;
663
    }
664
    env->queued_work_last = NULL;
665
    qemu_cond_broadcast(&qemu_work_cond);
666
}
667

    
668
static void qemu_wait_io_event_common(CPUState *env)
669
{
670
    if (env->stop) {
671
        env->stop = 0;
672
        env->stopped = 1;
673
        qemu_cond_signal(&qemu_pause_cond);
674
    }
675
    flush_queued_work(env);
676
    env->thread_kicked = false;
677
}
678

    
679
static void qemu_tcg_wait_io_event(void)
680
{
681
    CPUState *env;
682

    
683
    while (all_cpu_threads_idle()) {
684
       /* Start accounting real time to the virtual clock if the CPUs
685
          are idle.  */
686
        qemu_clock_warp(vm_clock);
687
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
688
    }
689

    
690
    while (iothread_requesting_mutex) {
691
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
692
    }
693

    
694
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
695
        qemu_wait_io_event_common(env);
696
    }
697
}
698

    
699
static void qemu_kvm_wait_io_event(CPUState *env)
700
{
701
    while (cpu_thread_is_idle(env)) {
702
        qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
703
    }
704

    
705
    qemu_kvm_eat_signals(env);
706
    qemu_wait_io_event_common(env);
707
}
708

    
709
static void *qemu_kvm_cpu_thread_fn(void *arg)
710
{
711
    CPUState *env = arg;
712
    int r;
713

    
714
    qemu_mutex_lock(&qemu_global_mutex);
715
    qemu_thread_get_self(env->thread);
716
    env->thread_id = qemu_get_thread_id();
717

    
718
    r = kvm_init_vcpu(env);
719
    if (r < 0) {
720
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
721
        exit(1);
722
    }
723

    
724
    qemu_kvm_init_cpu_signals(env);
725

    
726
    /* signal CPU creation */
727
    env->created = 1;
728
    qemu_cond_signal(&qemu_cpu_cond);
729

    
730
    while (1) {
731
        if (cpu_can_run(env)) {
732
            r = kvm_cpu_exec(env);
733
            if (r == EXCP_DEBUG) {
734
                cpu_handle_guest_debug(env);
735
            }
736
        }
737
        qemu_kvm_wait_io_event(env);
738
    }
739

    
740
    return NULL;
741
}
742

    
743
static void tcg_exec_all(void);
744

    
745
static void *qemu_tcg_cpu_thread_fn(void *arg)
746
{
747
    CPUState *env = arg;
748

    
749
    qemu_tcg_init_cpu_signals();
750
    qemu_thread_get_self(env->thread);
751

    
752
    /* signal CPU creation */
753
    qemu_mutex_lock(&qemu_global_mutex);
754
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
755
        env->thread_id = qemu_get_thread_id();
756
        env->created = 1;
757
    }
758
    qemu_cond_signal(&qemu_cpu_cond);
759

    
760
    /* wait for initial kick-off after machine start */
761
    while (first_cpu->stopped) {
762
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
763
    }
764

    
765
    while (1) {
766
        tcg_exec_all();
767
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
768
            qemu_notify_event();
769
        }
770
        qemu_tcg_wait_io_event();
771
    }
772

    
773
    return NULL;
774
}
775

    
776
static void qemu_cpu_kick_thread(CPUState *env)
777
{
778
#ifndef _WIN32
779
    int err;
780

    
781
    err = pthread_kill(env->thread->thread, SIG_IPI);
782
    if (err) {
783
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
784
        exit(1);
785
    }
786
#else /* _WIN32 */
787
    if (!qemu_cpu_is_self(env)) {
788
        SuspendThread(env->hThread);
789
        cpu_signal(0);
790
        ResumeThread(env->hThread);
791
    }
792
#endif
793
}
794

    
795
void qemu_cpu_kick(void *_env)
796
{
797
    CPUState *env = _env;
798

    
799
    qemu_cond_broadcast(env->halt_cond);
800
    if (kvm_enabled() && !env->thread_kicked) {
801
        qemu_cpu_kick_thread(env);
802
        env->thread_kicked = true;
803
    }
804
}
805

    
806
void qemu_cpu_kick_self(void)
807
{
808
#ifndef _WIN32
809
    assert(cpu_single_env);
810

    
811
    if (!cpu_single_env->thread_kicked) {
812
        qemu_cpu_kick_thread(cpu_single_env);
813
        cpu_single_env->thread_kicked = true;
814
    }
815
#else
816
    abort();
817
#endif
818
}
819

    
820
int qemu_cpu_is_self(void *_env)
821
{
822
    CPUState *env = _env;
823

    
824
    return qemu_thread_is_self(env->thread);
825
}
826

    
827
void qemu_mutex_lock_iothread(void)
828
{
829
    if (kvm_enabled()) {
830
        qemu_mutex_lock(&qemu_global_mutex);
831
    } else {
832
        iothread_requesting_mutex = true;
833
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
834
            qemu_cpu_kick_thread(first_cpu);
835
            qemu_mutex_lock(&qemu_global_mutex);
836
        }
837
        iothread_requesting_mutex = false;
838
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
839
    }
840
}
841

    
842
void qemu_mutex_unlock_iothread(void)
843
{
844
    qemu_mutex_unlock(&qemu_global_mutex);
845
}
846

    
847
static int all_vcpus_paused(void)
848
{
849
    CPUState *penv = first_cpu;
850

    
851
    while (penv) {
852
        if (!penv->stopped) {
853
            return 0;
854
        }
855
        penv = (CPUState *)penv->next_cpu;
856
    }
857

    
858
    return 1;
859
}
860

    
861
void pause_all_vcpus(void)
862
{
863
    CPUState *penv = first_cpu;
864

    
865
    qemu_clock_enable(vm_clock, false);
866
    while (penv) {
867
        penv->stop = 1;
868
        qemu_cpu_kick(penv);
869
        penv = (CPUState *)penv->next_cpu;
870
    }
871

    
872
    while (!all_vcpus_paused()) {
873
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
874
        penv = first_cpu;
875
        while (penv) {
876
            qemu_cpu_kick(penv);
877
            penv = (CPUState *)penv->next_cpu;
878
        }
879
    }
880
}
881

    
882
void resume_all_vcpus(void)
883
{
884
    CPUState *penv = first_cpu;
885

    
886
    qemu_clock_enable(vm_clock, true);
887
    while (penv) {
888
        penv->stop = 0;
889
        penv->stopped = 0;
890
        qemu_cpu_kick(penv);
891
        penv = (CPUState *)penv->next_cpu;
892
    }
893
}
894

    
895
static void qemu_tcg_init_vcpu(void *_env)
896
{
897
    CPUState *env = _env;
898

    
899
    /* share a single thread for all cpus with TCG */
900
    if (!tcg_cpu_thread) {
901
        env->thread = g_malloc0(sizeof(QemuThread));
902
        env->halt_cond = g_malloc0(sizeof(QemuCond));
903
        qemu_cond_init(env->halt_cond);
904
        tcg_halt_cond = env->halt_cond;
905
        qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env,
906
                           QEMU_THREAD_JOINABLE);
907
#ifdef _WIN32
908
        env->hThread = qemu_thread_get_handle(env->thread);
909
#endif
910
        while (env->created == 0) {
911
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
912
        }
913
        tcg_cpu_thread = env->thread;
914
    } else {
915
        env->thread = tcg_cpu_thread;
916
        env->halt_cond = tcg_halt_cond;
917
    }
918
}
919

    
920
static void qemu_kvm_start_vcpu(CPUState *env)
921
{
922
    env->thread = g_malloc0(sizeof(QemuThread));
923
    env->halt_cond = g_malloc0(sizeof(QemuCond));
924
    qemu_cond_init(env->halt_cond);
925
    qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env,
926
                       QEMU_THREAD_JOINABLE);
927
    while (env->created == 0) {
928
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
929
    }
930
}
931

    
932
void qemu_init_vcpu(void *_env)
933
{
934
    CPUState *env = _env;
935

    
936
    env->nr_cores = smp_cores;
937
    env->nr_threads = smp_threads;
938
    env->stopped = 1;
939
    if (kvm_enabled()) {
940
        qemu_kvm_start_vcpu(env);
941
    } else {
942
        qemu_tcg_init_vcpu(env);
943
    }
944
}
945

    
946
void cpu_stop_current(void)
947
{
948
    if (cpu_single_env) {
949
        cpu_single_env->stop = 0;
950
        cpu_single_env->stopped = 1;
951
        cpu_exit(cpu_single_env);
952
        qemu_cond_signal(&qemu_pause_cond);
953
    }
954
}
955

    
956
void vm_stop(RunState state)
957
{
958
    if (!qemu_thread_is_self(&io_thread)) {
959
        qemu_system_vmstop_request(state);
960
        /*
961
         * FIXME: should not return to device code in case
962
         * vm_stop() has been requested.
963
         */
964
        cpu_stop_current();
965
        return;
966
    }
967
    do_vm_stop(state);
968
}
969

    
970
/* does a state transition even if the VM is already stopped,
971
   current state is forgotten forever */
972
void vm_stop_force_state(RunState state)
973
{
974
    if (runstate_is_running()) {
975
        vm_stop(state);
976
    } else {
977
        runstate_set(state);
978
    }
979
}
980

    
981
static int tcg_cpu_exec(CPUState *env)
982
{
983
    int ret;
984
#ifdef CONFIG_PROFILER
985
    int64_t ti;
986
#endif
987

    
988
#ifdef CONFIG_PROFILER
989
    ti = profile_getclock();
990
#endif
991
    if (use_icount) {
992
        int64_t count;
993
        int decr;
994
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
995
        env->icount_decr.u16.low = 0;
996
        env->icount_extra = 0;
997
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
998
        qemu_icount += count;
999
        decr = (count > 0xffff) ? 0xffff : count;
1000
        count -= decr;
1001
        env->icount_decr.u16.low = decr;
1002
        env->icount_extra = count;
1003
    }
1004
    ret = cpu_exec(env);
1005
#ifdef CONFIG_PROFILER
1006
    qemu_time += profile_getclock() - ti;
1007
#endif
1008
    if (use_icount) {
1009
        /* Fold pending instructions back into the
1010
           instruction counter, and clear the interrupt flag.  */
1011
        qemu_icount -= (env->icount_decr.u16.low
1012
                        + env->icount_extra);
1013
        env->icount_decr.u32 = 0;
1014
        env->icount_extra = 0;
1015
    }
1016
    return ret;
1017
}
1018

    
1019
static void tcg_exec_all(void)
1020
{
1021
    int r;
1022

    
1023
    /* Account partial waits to the vm_clock.  */
1024
    qemu_clock_warp(vm_clock);
1025

    
1026
    if (next_cpu == NULL) {
1027
        next_cpu = first_cpu;
1028
    }
1029
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1030
        CPUState *env = next_cpu;
1031

    
1032
        qemu_clock_enable(vm_clock,
1033
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1034

    
1035
        if (cpu_can_run(env)) {
1036
            r = tcg_cpu_exec(env);
1037
            if (r == EXCP_DEBUG) {
1038
                cpu_handle_guest_debug(env);
1039
                break;
1040
            }
1041
        } else if (env->stop || env->stopped) {
1042
            break;
1043
        }
1044
    }
1045
    exit_request = 0;
1046
}
1047

    
1048
void set_numa_modes(void)
1049
{
1050
    CPUState *env;
1051
    int i;
1052

    
1053
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1054
        for (i = 0; i < nb_numa_nodes; i++) {
1055
            if (node_cpumask[i] & (1 << env->cpu_index)) {
1056
                env->numa_node = i;
1057
            }
1058
        }
1059
    }
1060
}
1061

    
1062
void set_cpu_log(const char *optarg)
1063
{
1064
    int mask;
1065
    const CPULogItem *item;
1066

    
1067
    mask = cpu_str_to_log_mask(optarg);
1068
    if (!mask) {
1069
        printf("Log items (comma separated):\n");
1070
        for (item = cpu_log_items; item->mask != 0; item++) {
1071
            printf("%-10s %s\n", item->name, item->help);
1072
        }
1073
        exit(1);
1074
    }
1075
    cpu_set_log(mask);
1076
}
1077

    
1078
void set_cpu_log_filename(const char *optarg)
1079
{
1080
    cpu_set_log_filename(optarg);
1081
}
1082

    
1083
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1084
{
1085
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1086
#if defined(cpu_list_id)
1087
    cpu_list_id(f, cpu_fprintf, optarg);
1088
#elif defined(cpu_list)
1089
    cpu_list(f, cpu_fprintf); /* deprecated */
1090
#endif
1091
}
1092

    
1093
CpuInfoList *qmp_query_cpus(Error **errp)
1094
{
1095
    CpuInfoList *head = NULL, *cur_item = NULL;
1096
    CPUState *env;
1097

    
1098
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1099
        CpuInfoList *info;
1100

    
1101
        cpu_synchronize_state(env);
1102

    
1103
        info = g_malloc0(sizeof(*info));
1104
        info->value = g_malloc0(sizeof(*info->value));
1105
        info->value->CPU = env->cpu_index;
1106
        info->value->current = (env == first_cpu);
1107
        info->value->halted = env->halted;
1108
        info->value->thread_id = env->thread_id;
1109
#if defined(TARGET_I386)
1110
        info->value->has_pc = true;
1111
        info->value->pc = env->eip + env->segs[R_CS].base;
1112
#elif defined(TARGET_PPC)
1113
        info->value->has_nip = true;
1114
        info->value->nip = env->nip;
1115
#elif defined(TARGET_SPARC)
1116
        info->value->has_pc = true;
1117
        info->value->pc = env->pc;
1118
        info->value->has_npc = true;
1119
        info->value->npc = env->npc;
1120
#elif defined(TARGET_MIPS)
1121
        info->value->has_PC = true;
1122
        info->value->PC = env->active_tc.PC;
1123
#endif
1124

    
1125
        /* XXX: waiting for the qapi to support GSList */
1126
        if (!cur_item) {
1127
            head = cur_item = info;
1128
        } else {
1129
            cur_item->next = info;
1130
            cur_item = info;
1131
        }
1132
    }
1133

    
1134
    return head;
1135
}
1136

    
1137
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1138
                 bool has_cpu, int64_t cpu_index, Error **errp)
1139
{
1140
    FILE *f;
1141
    uint32_t l;
1142
    CPUState *env;
1143
    uint8_t buf[1024];
1144

    
1145
    if (!has_cpu) {
1146
        cpu_index = 0;
1147
    }
1148

    
1149
    for (env = first_cpu; env; env = env->next_cpu) {
1150
        if (cpu_index == env->cpu_index) {
1151
            break;
1152
        }
1153
    }
1154

    
1155
    if (env == NULL) {
1156
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1157
                  "a CPU number");
1158
        return;
1159
    }
1160

    
1161
    f = fopen(filename, "wb");
1162
    if (!f) {
1163
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1164
        return;
1165
    }
1166

    
1167
    while (size != 0) {
1168
        l = sizeof(buf);
1169
        if (l > size)
1170
            l = size;
1171
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1172
        if (fwrite(buf, 1, l, f) != l) {
1173
            error_set(errp, QERR_IO_ERROR);
1174
            goto exit;
1175
        }
1176
        addr += l;
1177
        size -= l;
1178
    }
1179

    
1180
exit:
1181
    fclose(f);
1182
}
1183

    
1184
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1185
                  Error **errp)
1186
{
1187
    FILE *f;
1188
    uint32_t l;
1189
    uint8_t buf[1024];
1190

    
1191
    f = fopen(filename, "wb");
1192
    if (!f) {
1193
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1194
        return;
1195
    }
1196

    
1197
    while (size != 0) {
1198
        l = sizeof(buf);
1199
        if (l > size)
1200
            l = size;
1201
        cpu_physical_memory_rw(addr, buf, l, 0);
1202
        if (fwrite(buf, 1, l, f) != l) {
1203
            error_set(errp, QERR_IO_ERROR);
1204
            goto exit;
1205
        }
1206
        addr += l;
1207
        size -= l;
1208
    }
1209

    
1210
exit:
1211
    fclose(f);
1212
}
1213

    
1214
void qmp_inject_nmi(Error **errp)
1215
{
1216
#if defined(TARGET_I386)
1217
    CPUState *env;
1218

    
1219
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1220
        cpu_interrupt(env, CPU_INTERRUPT_NMI);
1221
    }
1222
#else
1223
    error_set(errp, QERR_UNSUPPORTED);
1224
#endif
1225
}