Statistics
| Branch: | Revision:

root / cpus.c @ 491d6e80

History | View | Annotate | Download (33.8 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor/monitor.h"
29
#include "sysemu/sysemu.h"
30
#include "exec/gdbstub.h"
31
#include "sysemu/dma.h"
32
#include "sysemu/kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu/thread.h"
36
#include "sysemu/cpus.h"
37
#include "sysemu/qtest.h"
38
#include "qemu/main-loop.h"
39
#include "qemu/bitmap.h"
40

    
41
#ifndef _WIN32
42
#include "qemu/compatfd.h"
43
#endif
44

    
45
#ifdef CONFIG_LINUX
46

    
47
#include <sys/prctl.h>
48

    
49
#ifndef PR_MCE_KILL
50
#define PR_MCE_KILL 33
51
#endif
52

    
53
#ifndef PR_MCE_KILL_SET
54
#define PR_MCE_KILL_SET 1
55
#endif
56

    
57
#ifndef PR_MCE_KILL_EARLY
58
#define PR_MCE_KILL_EARLY 1
59
#endif
60

    
61
#endif /* CONFIG_LINUX */
62

    
63
static CPUArchState *next_cpu;
64

    
65
static bool cpu_thread_is_idle(CPUState *cpu)
66
{
67
    if (cpu->stop || cpu->queued_work_first) {
68
        return false;
69
    }
70
    if (cpu->stopped || !runstate_is_running()) {
71
        return true;
72
    }
73
    if (!cpu->halted || qemu_cpu_has_work(cpu) ||
74
        kvm_async_interrupts_enabled()) {
75
        return false;
76
    }
77
    return true;
78
}
79

    
80
static bool all_cpu_threads_idle(void)
81
{
82
    CPUArchState *env;
83

    
84
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
85
        if (!cpu_thread_is_idle(ENV_GET_CPU(env))) {
86
            return false;
87
        }
88
    }
89
    return true;
90
}
91

    
92
/***********************************************************/
93
/* guest cycle counter */
94

    
95
/* Conversion factor from emulated instructions to virtual clock ticks.  */
96
static int icount_time_shift;
97
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
98
#define MAX_ICOUNT_SHIFT 10
99
/* Compensate for varying guest execution speed.  */
100
static int64_t qemu_icount_bias;
101
static QEMUTimer *icount_rt_timer;
102
static QEMUTimer *icount_vm_timer;
103
static QEMUTimer *icount_warp_timer;
104
static int64_t vm_clock_warp_start;
105
static int64_t qemu_icount;
106

    
107
typedef struct TimersState {
108
    int64_t cpu_ticks_prev;
109
    int64_t cpu_ticks_offset;
110
    int64_t cpu_clock_offset;
111
    int32_t cpu_ticks_enabled;
112
    int64_t dummy;
113
} TimersState;
114

    
115
TimersState timers_state;
116

    
117
/* Return the virtual CPU time, based on the instruction counter.  */
118
int64_t cpu_get_icount(void)
119
{
120
    int64_t icount;
121
    CPUArchState *env = cpu_single_env;
122

    
123
    icount = qemu_icount;
124
    if (env) {
125
        if (!can_do_io(env)) {
126
            fprintf(stderr, "Bad clock read\n");
127
        }
128
        icount -= (env->icount_decr.u16.low + env->icount_extra);
129
    }
130
    return qemu_icount_bias + (icount << icount_time_shift);
131
}
132

    
133
/* return the host CPU cycle counter and handle stop/restart */
134
int64_t cpu_get_ticks(void)
135
{
136
    if (use_icount) {
137
        return cpu_get_icount();
138
    }
139
    if (!timers_state.cpu_ticks_enabled) {
140
        return timers_state.cpu_ticks_offset;
141
    } else {
142
        int64_t ticks;
143
        ticks = cpu_get_real_ticks();
144
        if (timers_state.cpu_ticks_prev > ticks) {
145
            /* Note: non increasing ticks may happen if the host uses
146
               software suspend */
147
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
148
        }
149
        timers_state.cpu_ticks_prev = ticks;
150
        return ticks + timers_state.cpu_ticks_offset;
151
    }
152
}
153

    
154
/* return the host CPU monotonic timer and handle stop/restart */
155
int64_t cpu_get_clock(void)
156
{
157
    int64_t ti;
158
    if (!timers_state.cpu_ticks_enabled) {
159
        return timers_state.cpu_clock_offset;
160
    } else {
161
        ti = get_clock();
162
        return ti + timers_state.cpu_clock_offset;
163
    }
164
}
165

    
166
/* enable cpu_get_ticks() */
167
void cpu_enable_ticks(void)
168
{
169
    if (!timers_state.cpu_ticks_enabled) {
170
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
171
        timers_state.cpu_clock_offset -= get_clock();
172
        timers_state.cpu_ticks_enabled = 1;
173
    }
174
}
175

    
176
/* disable cpu_get_ticks() : the clock is stopped. You must not call
177
   cpu_get_ticks() after that.  */
178
void cpu_disable_ticks(void)
179
{
180
    if (timers_state.cpu_ticks_enabled) {
181
        timers_state.cpu_ticks_offset = cpu_get_ticks();
182
        timers_state.cpu_clock_offset = cpu_get_clock();
183
        timers_state.cpu_ticks_enabled = 0;
184
    }
185
}
186

    
187
/* Correlation between real and virtual time is always going to be
188
   fairly approximate, so ignore small variation.
189
   When the guest is idle real and virtual time will be aligned in
190
   the IO wait loop.  */
191
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
192

    
193
static void icount_adjust(void)
194
{
195
    int64_t cur_time;
196
    int64_t cur_icount;
197
    int64_t delta;
198
    static int64_t last_delta;
199
    /* If the VM is not running, then do nothing.  */
200
    if (!runstate_is_running()) {
201
        return;
202
    }
203
    cur_time = cpu_get_clock();
204
    cur_icount = qemu_get_clock_ns(vm_clock);
205
    delta = cur_icount - cur_time;
206
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
207
    if (delta > 0
208
        && last_delta + ICOUNT_WOBBLE < delta * 2
209
        && icount_time_shift > 0) {
210
        /* The guest is getting too far ahead.  Slow time down.  */
211
        icount_time_shift--;
212
    }
213
    if (delta < 0
214
        && last_delta - ICOUNT_WOBBLE > delta * 2
215
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
216
        /* The guest is getting too far behind.  Speed time up.  */
217
        icount_time_shift++;
218
    }
219
    last_delta = delta;
220
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
221
}
222

    
223
static void icount_adjust_rt(void *opaque)
224
{
225
    qemu_mod_timer(icount_rt_timer,
226
                   qemu_get_clock_ms(rt_clock) + 1000);
227
    icount_adjust();
228
}
229

    
230
static void icount_adjust_vm(void *opaque)
231
{
232
    qemu_mod_timer(icount_vm_timer,
233
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
234
    icount_adjust();
235
}
236

    
237
static int64_t qemu_icount_round(int64_t count)
238
{
239
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
240
}
241

    
242
static void icount_warp_rt(void *opaque)
243
{
244
    if (vm_clock_warp_start == -1) {
245
        return;
246
    }
247

    
248
    if (runstate_is_running()) {
249
        int64_t clock = qemu_get_clock_ns(rt_clock);
250
        int64_t warp_delta = clock - vm_clock_warp_start;
251
        if (use_icount == 1) {
252
            qemu_icount_bias += warp_delta;
253
        } else {
254
            /*
255
             * In adaptive mode, do not let the vm_clock run too
256
             * far ahead of real time.
257
             */
258
            int64_t cur_time = cpu_get_clock();
259
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
260
            int64_t delta = cur_time - cur_icount;
261
            qemu_icount_bias += MIN(warp_delta, delta);
262
        }
263
        if (qemu_clock_expired(vm_clock)) {
264
            qemu_notify_event();
265
        }
266
    }
267
    vm_clock_warp_start = -1;
268
}
269

    
270
void qtest_clock_warp(int64_t dest)
271
{
272
    int64_t clock = qemu_get_clock_ns(vm_clock);
273
    assert(qtest_enabled());
274
    while (clock < dest) {
275
        int64_t deadline = qemu_clock_deadline(vm_clock);
276
        int64_t warp = MIN(dest - clock, deadline);
277
        qemu_icount_bias += warp;
278
        qemu_run_timers(vm_clock);
279
        clock = qemu_get_clock_ns(vm_clock);
280
    }
281
    qemu_notify_event();
282
}
283

    
284
void qemu_clock_warp(QEMUClock *clock)
285
{
286
    int64_t deadline;
287

    
288
    /*
289
     * There are too many global variables to make the "warp" behavior
290
     * applicable to other clocks.  But a clock argument removes the
291
     * need for if statements all over the place.
292
     */
293
    if (clock != vm_clock || !use_icount) {
294
        return;
295
    }
296

    
297
    /*
298
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
299
     * ensures that the deadline for the timer is computed correctly below.
300
     * This also makes sure that the insn counter is synchronized before the
301
     * CPU starts running, in case the CPU is woken by an event other than
302
     * the earliest vm_clock timer.
303
     */
304
    icount_warp_rt(NULL);
305
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
306
        qemu_del_timer(icount_warp_timer);
307
        return;
308
    }
309

    
310
    if (qtest_enabled()) {
311
        /* When testing, qtest commands advance icount.  */
312
        return;
313
    }
314

    
315
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
316
    deadline = qemu_clock_deadline(vm_clock);
317
    if (deadline > 0) {
318
        /*
319
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
320
         * sleep.  Otherwise, the CPU might be waiting for a future timer
321
         * interrupt to wake it up, but the interrupt never comes because
322
         * the vCPU isn't running any insns and thus doesn't advance the
323
         * vm_clock.
324
         *
325
         * An extreme solution for this problem would be to never let VCPUs
326
         * sleep in icount mode if there is a pending vm_clock timer; rather
327
         * time could just advance to the next vm_clock event.  Instead, we
328
         * do stop VCPUs and only advance vm_clock after some "real" time,
329
         * (related to the time left until the next event) has passed.  This
330
         * rt_clock timer will do this.  This avoids that the warps are too
331
         * visible externally---for example, you will not be sending network
332
         * packets continuously instead of every 100ms.
333
         */
334
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
335
    } else {
336
        qemu_notify_event();
337
    }
338
}
339

    
340
static const VMStateDescription vmstate_timers = {
341
    .name = "timer",
342
    .version_id = 2,
343
    .minimum_version_id = 1,
344
    .minimum_version_id_old = 1,
345
    .fields      = (VMStateField[]) {
346
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
347
        VMSTATE_INT64(dummy, TimersState),
348
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
349
        VMSTATE_END_OF_LIST()
350
    }
351
};
352

    
353
void configure_icount(const char *option)
354
{
355
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
356
    if (!option) {
357
        return;
358
    }
359

    
360
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
361
    if (strcmp(option, "auto") != 0) {
362
        icount_time_shift = strtol(option, NULL, 0);
363
        use_icount = 1;
364
        return;
365
    }
366

    
367
    use_icount = 2;
368

    
369
    /* 125MIPS seems a reasonable initial guess at the guest speed.
370
       It will be corrected fairly quickly anyway.  */
371
    icount_time_shift = 3;
372

    
373
    /* Have both realtime and virtual time triggers for speed adjustment.
374
       The realtime trigger catches emulated time passing too slowly,
375
       the virtual time trigger catches emulated time passing too fast.
376
       Realtime triggers occur even when idle, so use them less frequently
377
       than VM triggers.  */
378
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
379
    qemu_mod_timer(icount_rt_timer,
380
                   qemu_get_clock_ms(rt_clock) + 1000);
381
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
382
    qemu_mod_timer(icount_vm_timer,
383
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
384
}
385

    
386
/***********************************************************/
387
void hw_error(const char *fmt, ...)
388
{
389
    va_list ap;
390
    CPUArchState *env;
391
    CPUState *cpu;
392

    
393
    va_start(ap, fmt);
394
    fprintf(stderr, "qemu: hardware error: ");
395
    vfprintf(stderr, fmt, ap);
396
    fprintf(stderr, "\n");
397
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
398
        cpu = ENV_GET_CPU(env);
399
        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
400
        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
401
    }
402
    va_end(ap);
403
    abort();
404
}
405

    
406
void cpu_synchronize_all_states(void)
407
{
408
    CPUArchState *env;
409

    
410
    for (env = first_cpu; env; env = env->next_cpu) {
411
        cpu_synchronize_state(ENV_GET_CPU(env));
412
    }
413
}
414

    
415
void cpu_synchronize_all_post_reset(void)
416
{
417
    CPUArchState *cpu;
418

    
419
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
420
        cpu_synchronize_post_reset(ENV_GET_CPU(cpu));
421
    }
422
}
423

    
424
void cpu_synchronize_all_post_init(void)
425
{
426
    CPUArchState *cpu;
427

    
428
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
429
        cpu_synchronize_post_init(ENV_GET_CPU(cpu));
430
    }
431
}
432

    
433
bool cpu_is_stopped(CPUState *cpu)
434
{
435
    return !runstate_is_running() || cpu->stopped;
436
}
437

    
438
static void do_vm_stop(RunState state)
439
{
440
    if (runstate_is_running()) {
441
        cpu_disable_ticks();
442
        pause_all_vcpus();
443
        runstate_set(state);
444
        vm_state_notify(0, state);
445
        bdrv_drain_all();
446
        bdrv_flush_all();
447
        monitor_protocol_event(QEVENT_STOP, NULL);
448
    }
449
}
450

    
451
static bool cpu_can_run(CPUState *cpu)
452
{
453
    if (cpu->stop) {
454
        return false;
455
    }
456
    if (cpu->stopped || !runstate_is_running()) {
457
        return false;
458
    }
459
    return true;
460
}
461

    
462
static void cpu_handle_guest_debug(CPUArchState *env)
463
{
464
    CPUState *cpu = ENV_GET_CPU(env);
465

    
466
    gdb_set_stop_cpu(env);
467
    qemu_system_debug_request();
468
    cpu->stopped = true;
469
}
470

    
471
static void cpu_signal(int sig)
472
{
473
    if (cpu_single_env) {
474
        cpu_exit(ENV_GET_CPU(cpu_single_env));
475
    }
476
    exit_request = 1;
477
}
478

    
479
#ifdef CONFIG_LINUX
480
static void sigbus_reraise(void)
481
{
482
    sigset_t set;
483
    struct sigaction action;
484

    
485
    memset(&action, 0, sizeof(action));
486
    action.sa_handler = SIG_DFL;
487
    if (!sigaction(SIGBUS, &action, NULL)) {
488
        raise(SIGBUS);
489
        sigemptyset(&set);
490
        sigaddset(&set, SIGBUS);
491
        sigprocmask(SIG_UNBLOCK, &set, NULL);
492
    }
493
    perror("Failed to re-raise SIGBUS!\n");
494
    abort();
495
}
496

    
497
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
498
                           void *ctx)
499
{
500
    if (kvm_on_sigbus(siginfo->ssi_code,
501
                      (void *)(intptr_t)siginfo->ssi_addr)) {
502
        sigbus_reraise();
503
    }
504
}
505

    
506
static void qemu_init_sigbus(void)
507
{
508
    struct sigaction action;
509

    
510
    memset(&action, 0, sizeof(action));
511
    action.sa_flags = SA_SIGINFO;
512
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
513
    sigaction(SIGBUS, &action, NULL);
514

    
515
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
516
}
517

    
518
static void qemu_kvm_eat_signals(CPUState *cpu)
519
{
520
    struct timespec ts = { 0, 0 };
521
    siginfo_t siginfo;
522
    sigset_t waitset;
523
    sigset_t chkset;
524
    int r;
525

    
526
    sigemptyset(&waitset);
527
    sigaddset(&waitset, SIG_IPI);
528
    sigaddset(&waitset, SIGBUS);
529

    
530
    do {
531
        r = sigtimedwait(&waitset, &siginfo, &ts);
532
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
533
            perror("sigtimedwait");
534
            exit(1);
535
        }
536

    
537
        switch (r) {
538
        case SIGBUS:
539
            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
540
                sigbus_reraise();
541
            }
542
            break;
543
        default:
544
            break;
545
        }
546

    
547
        r = sigpending(&chkset);
548
        if (r == -1) {
549
            perror("sigpending");
550
            exit(1);
551
        }
552
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
553
}
554

    
555
#else /* !CONFIG_LINUX */
556

    
557
static void qemu_init_sigbus(void)
558
{
559
}
560

    
561
static void qemu_kvm_eat_signals(CPUState *cpu)
562
{
563
}
564
#endif /* !CONFIG_LINUX */
565

    
566
#ifndef _WIN32
567
static void dummy_signal(int sig)
568
{
569
}
570

    
571
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
572
{
573
    CPUState *cpu = ENV_GET_CPU(env);
574
    int r;
575
    sigset_t set;
576
    struct sigaction sigact;
577

    
578
    memset(&sigact, 0, sizeof(sigact));
579
    sigact.sa_handler = dummy_signal;
580
    sigaction(SIG_IPI, &sigact, NULL);
581

    
582
    pthread_sigmask(SIG_BLOCK, NULL, &set);
583
    sigdelset(&set, SIG_IPI);
584
    sigdelset(&set, SIGBUS);
585
    r = kvm_set_signal_mask(cpu, &set);
586
    if (r) {
587
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
588
        exit(1);
589
    }
590
}
591

    
592
static void qemu_tcg_init_cpu_signals(void)
593
{
594
    sigset_t set;
595
    struct sigaction sigact;
596

    
597
    memset(&sigact, 0, sizeof(sigact));
598
    sigact.sa_handler = cpu_signal;
599
    sigaction(SIG_IPI, &sigact, NULL);
600

    
601
    sigemptyset(&set);
602
    sigaddset(&set, SIG_IPI);
603
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
604
}
605

    
606
#else /* _WIN32 */
607
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
608
{
609
    abort();
610
}
611

    
612
static void qemu_tcg_init_cpu_signals(void)
613
{
614
}
615
#endif /* _WIN32 */
616

    
617
static QemuMutex qemu_global_mutex;
618
static QemuCond qemu_io_proceeded_cond;
619
static bool iothread_requesting_mutex;
620

    
621
static QemuThread io_thread;
622

    
623
static QemuThread *tcg_cpu_thread;
624
static QemuCond *tcg_halt_cond;
625

    
626
/* cpu creation */
627
static QemuCond qemu_cpu_cond;
628
/* system init */
629
static QemuCond qemu_pause_cond;
630
static QemuCond qemu_work_cond;
631

    
632
void qemu_init_cpu_loop(void)
633
{
634
    qemu_init_sigbus();
635
    qemu_cond_init(&qemu_cpu_cond);
636
    qemu_cond_init(&qemu_pause_cond);
637
    qemu_cond_init(&qemu_work_cond);
638
    qemu_cond_init(&qemu_io_proceeded_cond);
639
    qemu_mutex_init(&qemu_global_mutex);
640

    
641
    qemu_thread_get_self(&io_thread);
642
}
643

    
644
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
645
{
646
    struct qemu_work_item wi;
647

    
648
    if (qemu_cpu_is_self(cpu)) {
649
        func(data);
650
        return;
651
    }
652

    
653
    wi.func = func;
654
    wi.data = data;
655
    if (cpu->queued_work_first == NULL) {
656
        cpu->queued_work_first = &wi;
657
    } else {
658
        cpu->queued_work_last->next = &wi;
659
    }
660
    cpu->queued_work_last = &wi;
661
    wi.next = NULL;
662
    wi.done = false;
663

    
664
    qemu_cpu_kick(cpu);
665
    while (!wi.done) {
666
        CPUArchState *self_env = cpu_single_env;
667

    
668
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
669
        cpu_single_env = self_env;
670
    }
671
}
672

    
673
static void flush_queued_work(CPUState *cpu)
674
{
675
    struct qemu_work_item *wi;
676

    
677
    if (cpu->queued_work_first == NULL) {
678
        return;
679
    }
680

    
681
    while ((wi = cpu->queued_work_first)) {
682
        cpu->queued_work_first = wi->next;
683
        wi->func(wi->data);
684
        wi->done = true;
685
    }
686
    cpu->queued_work_last = NULL;
687
    qemu_cond_broadcast(&qemu_work_cond);
688
}
689

    
690
static void qemu_wait_io_event_common(CPUState *cpu)
691
{
692
    if (cpu->stop) {
693
        cpu->stop = false;
694
        cpu->stopped = true;
695
        qemu_cond_signal(&qemu_pause_cond);
696
    }
697
    flush_queued_work(cpu);
698
    cpu->thread_kicked = false;
699
}
700

    
701
static void qemu_tcg_wait_io_event(void)
702
{
703
    CPUArchState *env;
704

    
705
    while (all_cpu_threads_idle()) {
706
       /* Start accounting real time to the virtual clock if the CPUs
707
          are idle.  */
708
        qemu_clock_warp(vm_clock);
709
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
710
    }
711

    
712
    while (iothread_requesting_mutex) {
713
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
714
    }
715

    
716
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
717
        qemu_wait_io_event_common(ENV_GET_CPU(env));
718
    }
719
}
720

    
721
static void qemu_kvm_wait_io_event(CPUState *cpu)
722
{
723
    while (cpu_thread_is_idle(cpu)) {
724
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
725
    }
726

    
727
    qemu_kvm_eat_signals(cpu);
728
    qemu_wait_io_event_common(cpu);
729
}
730

    
731
static void *qemu_kvm_cpu_thread_fn(void *arg)
732
{
733
    CPUArchState *env = arg;
734
    CPUState *cpu = ENV_GET_CPU(env);
735
    int r;
736

    
737
    qemu_mutex_lock(&qemu_global_mutex);
738
    qemu_thread_get_self(cpu->thread);
739
    cpu->thread_id = qemu_get_thread_id();
740
    cpu_single_env = env;
741

    
742
    r = kvm_init_vcpu(cpu);
743
    if (r < 0) {
744
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
745
        exit(1);
746
    }
747

    
748
    qemu_kvm_init_cpu_signals(env);
749

    
750
    /* signal CPU creation */
751
    cpu->created = true;
752
    qemu_cond_signal(&qemu_cpu_cond);
753

    
754
    while (1) {
755
        if (cpu_can_run(cpu)) {
756
            r = kvm_cpu_exec(env);
757
            if (r == EXCP_DEBUG) {
758
                cpu_handle_guest_debug(env);
759
            }
760
        }
761
        qemu_kvm_wait_io_event(cpu);
762
    }
763

    
764
    return NULL;
765
}
766

    
767
static void *qemu_dummy_cpu_thread_fn(void *arg)
768
{
769
#ifdef _WIN32
770
    fprintf(stderr, "qtest is not supported under Windows\n");
771
    exit(1);
772
#else
773
    CPUArchState *env = arg;
774
    CPUState *cpu = ENV_GET_CPU(env);
775
    sigset_t waitset;
776
    int r;
777

    
778
    qemu_mutex_lock_iothread();
779
    qemu_thread_get_self(cpu->thread);
780
    cpu->thread_id = qemu_get_thread_id();
781

    
782
    sigemptyset(&waitset);
783
    sigaddset(&waitset, SIG_IPI);
784

    
785
    /* signal CPU creation */
786
    cpu->created = true;
787
    qemu_cond_signal(&qemu_cpu_cond);
788

    
789
    cpu_single_env = env;
790
    while (1) {
791
        cpu_single_env = NULL;
792
        qemu_mutex_unlock_iothread();
793
        do {
794
            int sig;
795
            r = sigwait(&waitset, &sig);
796
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
797
        if (r == -1) {
798
            perror("sigwait");
799
            exit(1);
800
        }
801
        qemu_mutex_lock_iothread();
802
        cpu_single_env = env;
803
        qemu_wait_io_event_common(cpu);
804
    }
805

    
806
    return NULL;
807
#endif
808
}
809

    
810
static void tcg_exec_all(void);
811

    
812
static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
813
{
814
    cpu->thread_id = qemu_get_thread_id();
815
    cpu->created = true;
816
}
817

    
818
static void *qemu_tcg_cpu_thread_fn(void *arg)
819
{
820
    CPUState *cpu = arg;
821
    CPUArchState *env;
822

    
823
    qemu_tcg_init_cpu_signals();
824
    qemu_thread_get_self(cpu->thread);
825

    
826
    qemu_mutex_lock(&qemu_global_mutex);
827
    qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
828
    qemu_cond_signal(&qemu_cpu_cond);
829

    
830
    /* wait for initial kick-off after machine start */
831
    while (ENV_GET_CPU(first_cpu)->stopped) {
832
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
833

    
834
        /* process any pending work */
835
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
836
            qemu_wait_io_event_common(ENV_GET_CPU(env));
837
        }
838
    }
839

    
840
    while (1) {
841
        tcg_exec_all();
842
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
843
            qemu_notify_event();
844
        }
845
        qemu_tcg_wait_io_event();
846
    }
847

    
848
    return NULL;
849
}
850

    
851
static void qemu_cpu_kick_thread(CPUState *cpu)
852
{
853
#ifndef _WIN32
854
    int err;
855

    
856
    err = pthread_kill(cpu->thread->thread, SIG_IPI);
857
    if (err) {
858
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
859
        exit(1);
860
    }
861
#else /* _WIN32 */
862
    if (!qemu_cpu_is_self(cpu)) {
863
        CONTEXT tcgContext;
864

    
865
        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
866
            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
867
                    GetLastError());
868
            exit(1);
869
        }
870

    
871
        /* On multi-core systems, we are not sure that the thread is actually
872
         * suspended until we can get the context.
873
         */
874
        tcgContext.ContextFlags = CONTEXT_CONTROL;
875
        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
876
            continue;
877
        }
878

    
879
        cpu_signal(0);
880

    
881
        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
882
            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
883
                    GetLastError());
884
            exit(1);
885
        }
886
    }
887
#endif
888
}
889

    
890
void qemu_cpu_kick(CPUState *cpu)
891
{
892
    qemu_cond_broadcast(cpu->halt_cond);
893
    if (!tcg_enabled() && !cpu->thread_kicked) {
894
        qemu_cpu_kick_thread(cpu);
895
        cpu->thread_kicked = true;
896
    }
897
}
898

    
899
void qemu_cpu_kick_self(void)
900
{
901
#ifndef _WIN32
902
    assert(cpu_single_env);
903
    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
904

    
905
    if (!cpu_single_cpu->thread_kicked) {
906
        qemu_cpu_kick_thread(cpu_single_cpu);
907
        cpu_single_cpu->thread_kicked = true;
908
    }
909
#else
910
    abort();
911
#endif
912
}
913

    
914
bool qemu_cpu_is_self(CPUState *cpu)
915
{
916
    return qemu_thread_is_self(cpu->thread);
917
}
918

    
919
static bool qemu_in_vcpu_thread(void)
920
{
921
    return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
922
}
923

    
924
void qemu_mutex_lock_iothread(void)
925
{
926
    if (!tcg_enabled()) {
927
        qemu_mutex_lock(&qemu_global_mutex);
928
    } else {
929
        iothread_requesting_mutex = true;
930
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
931
            qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
932
            qemu_mutex_lock(&qemu_global_mutex);
933
        }
934
        iothread_requesting_mutex = false;
935
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
936
    }
937
}
938

    
939
void qemu_mutex_unlock_iothread(void)
940
{
941
    qemu_mutex_unlock(&qemu_global_mutex);
942
}
943

    
944
static int all_vcpus_paused(void)
945
{
946
    CPUArchState *penv = first_cpu;
947

    
948
    while (penv) {
949
        CPUState *pcpu = ENV_GET_CPU(penv);
950
        if (!pcpu->stopped) {
951
            return 0;
952
        }
953
        penv = penv->next_cpu;
954
    }
955

    
956
    return 1;
957
}
958

    
959
void pause_all_vcpus(void)
960
{
961
    CPUArchState *penv = first_cpu;
962

    
963
    qemu_clock_enable(vm_clock, false);
964
    while (penv) {
965
        CPUState *pcpu = ENV_GET_CPU(penv);
966
        pcpu->stop = true;
967
        qemu_cpu_kick(pcpu);
968
        penv = penv->next_cpu;
969
    }
970

    
971
    if (qemu_in_vcpu_thread()) {
972
        cpu_stop_current();
973
        if (!kvm_enabled()) {
974
            penv = first_cpu;
975
            while (penv) {
976
                CPUState *pcpu = ENV_GET_CPU(penv);
977
                pcpu->stop = false;
978
                pcpu->stopped = true;
979
                penv = penv->next_cpu;
980
            }
981
            return;
982
        }
983
    }
984

    
985
    while (!all_vcpus_paused()) {
986
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
987
        penv = first_cpu;
988
        while (penv) {
989
            qemu_cpu_kick(ENV_GET_CPU(penv));
990
            penv = penv->next_cpu;
991
        }
992
    }
993
}
994

    
995
void cpu_resume(CPUState *cpu)
996
{
997
    cpu->stop = false;
998
    cpu->stopped = false;
999
    qemu_cpu_kick(cpu);
1000
}
1001

    
1002
void resume_all_vcpus(void)
1003
{
1004
    CPUArchState *penv = first_cpu;
1005

    
1006
    qemu_clock_enable(vm_clock, true);
1007
    while (penv) {
1008
        CPUState *pcpu = ENV_GET_CPU(penv);
1009
        cpu_resume(pcpu);
1010
        penv = penv->next_cpu;
1011
    }
1012
}
1013

    
1014
static void qemu_tcg_init_vcpu(CPUState *cpu)
1015
{
1016
    /* share a single thread for all cpus with TCG */
1017
    if (!tcg_cpu_thread) {
1018
        cpu->thread = g_malloc0(sizeof(QemuThread));
1019
        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1020
        qemu_cond_init(cpu->halt_cond);
1021
        tcg_halt_cond = cpu->halt_cond;
1022
        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1023
                           QEMU_THREAD_JOINABLE);
1024
#ifdef _WIN32
1025
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1026
#endif
1027
        while (!cpu->created) {
1028
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1029
        }
1030
        tcg_cpu_thread = cpu->thread;
1031
    } else {
1032
        cpu->thread = tcg_cpu_thread;
1033
        cpu->halt_cond = tcg_halt_cond;
1034
    }
1035
}
1036

    
1037
static void qemu_kvm_start_vcpu(CPUArchState *env)
1038
{
1039
    CPUState *cpu = ENV_GET_CPU(env);
1040

    
1041
    cpu->thread = g_malloc0(sizeof(QemuThread));
1042
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1043
    qemu_cond_init(cpu->halt_cond);
1044
    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1045
                       QEMU_THREAD_JOINABLE);
1046
    while (!cpu->created) {
1047
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1048
    }
1049
}
1050

    
1051
static void qemu_dummy_start_vcpu(CPUArchState *env)
1052
{
1053
    CPUState *cpu = ENV_GET_CPU(env);
1054

    
1055
    cpu->thread = g_malloc0(sizeof(QemuThread));
1056
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1057
    qemu_cond_init(cpu->halt_cond);
1058
    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1059
                       QEMU_THREAD_JOINABLE);
1060
    while (!cpu->created) {
1061
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1062
    }
1063
}
1064

    
1065
void qemu_init_vcpu(void *_env)
1066
{
1067
    CPUArchState *env = _env;
1068
    CPUState *cpu = ENV_GET_CPU(env);
1069

    
1070
    cpu->nr_cores = smp_cores;
1071
    cpu->nr_threads = smp_threads;
1072
    cpu->stopped = true;
1073
    if (kvm_enabled()) {
1074
        qemu_kvm_start_vcpu(env);
1075
    } else if (tcg_enabled()) {
1076
        qemu_tcg_init_vcpu(cpu);
1077
    } else {
1078
        qemu_dummy_start_vcpu(env);
1079
    }
1080
}
1081

    
1082
void cpu_stop_current(void)
1083
{
1084
    if (cpu_single_env) {
1085
        CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
1086
        cpu_single_cpu->stop = false;
1087
        cpu_single_cpu->stopped = true;
1088
        cpu_exit(cpu_single_cpu);
1089
        qemu_cond_signal(&qemu_pause_cond);
1090
    }
1091
}
1092

    
1093
void vm_stop(RunState state)
1094
{
1095
    if (qemu_in_vcpu_thread()) {
1096
        qemu_system_vmstop_request(state);
1097
        /*
1098
         * FIXME: should not return to device code in case
1099
         * vm_stop() has been requested.
1100
         */
1101
        cpu_stop_current();
1102
        return;
1103
    }
1104
    do_vm_stop(state);
1105
}
1106

    
1107
/* does a state transition even if the VM is already stopped,
1108
   current state is forgotten forever */
1109
void vm_stop_force_state(RunState state)
1110
{
1111
    if (runstate_is_running()) {
1112
        vm_stop(state);
1113
    } else {
1114
        runstate_set(state);
1115
    }
1116
}
1117

    
1118
static int tcg_cpu_exec(CPUArchState *env)
1119
{
1120
    int ret;
1121
#ifdef CONFIG_PROFILER
1122
    int64_t ti;
1123
#endif
1124

    
1125
#ifdef CONFIG_PROFILER
1126
    ti = profile_getclock();
1127
#endif
1128
    if (use_icount) {
1129
        int64_t count;
1130
        int decr;
1131
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1132
        env->icount_decr.u16.low = 0;
1133
        env->icount_extra = 0;
1134
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1135
        qemu_icount += count;
1136
        decr = (count > 0xffff) ? 0xffff : count;
1137
        count -= decr;
1138
        env->icount_decr.u16.low = decr;
1139
        env->icount_extra = count;
1140
    }
1141
    ret = cpu_exec(env);
1142
#ifdef CONFIG_PROFILER
1143
    qemu_time += profile_getclock() - ti;
1144
#endif
1145
    if (use_icount) {
1146
        /* Fold pending instructions back into the
1147
           instruction counter, and clear the interrupt flag.  */
1148
        qemu_icount -= (env->icount_decr.u16.low
1149
                        + env->icount_extra);
1150
        env->icount_decr.u32 = 0;
1151
        env->icount_extra = 0;
1152
    }
1153
    return ret;
1154
}
1155

    
1156
static void tcg_exec_all(void)
1157
{
1158
    int r;
1159

    
1160
    /* Account partial waits to the vm_clock.  */
1161
    qemu_clock_warp(vm_clock);
1162

    
1163
    if (next_cpu == NULL) {
1164
        next_cpu = first_cpu;
1165
    }
1166
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1167
        CPUArchState *env = next_cpu;
1168
        CPUState *cpu = ENV_GET_CPU(env);
1169

    
1170
        qemu_clock_enable(vm_clock,
1171
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1172

    
1173
        if (cpu_can_run(cpu)) {
1174
            r = tcg_cpu_exec(env);
1175
            if (r == EXCP_DEBUG) {
1176
                cpu_handle_guest_debug(env);
1177
                break;
1178
            }
1179
        } else if (cpu->stop || cpu->stopped) {
1180
            break;
1181
        }
1182
    }
1183
    exit_request = 0;
1184
}
1185

    
1186
void set_numa_modes(void)
1187
{
1188
    CPUArchState *env;
1189
    CPUState *cpu;
1190
    int i;
1191

    
1192
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1193
        cpu = ENV_GET_CPU(env);
1194
        for (i = 0; i < nb_numa_nodes; i++) {
1195
            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1196
                cpu->numa_node = i;
1197
            }
1198
        }
1199
    }
1200
}
1201

    
1202
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1203
{
1204
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1205
#if defined(cpu_list)
1206
    cpu_list(f, cpu_fprintf);
1207
#endif
1208
}
1209

    
1210
CpuInfoList *qmp_query_cpus(Error **errp)
1211
{
1212
    CpuInfoList *head = NULL, *cur_item = NULL;
1213
    CPUArchState *env;
1214

    
1215
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1216
        CPUState *cpu = ENV_GET_CPU(env);
1217
        CpuInfoList *info;
1218

    
1219
        cpu_synchronize_state(cpu);
1220

    
1221
        info = g_malloc0(sizeof(*info));
1222
        info->value = g_malloc0(sizeof(*info->value));
1223
        info->value->CPU = cpu->cpu_index;
1224
        info->value->current = (env == first_cpu);
1225
        info->value->halted = cpu->halted;
1226
        info->value->thread_id = cpu->thread_id;
1227
#if defined(TARGET_I386)
1228
        info->value->has_pc = true;
1229
        info->value->pc = env->eip + env->segs[R_CS].base;
1230
#elif defined(TARGET_PPC)
1231
        info->value->has_nip = true;
1232
        info->value->nip = env->nip;
1233
#elif defined(TARGET_SPARC)
1234
        info->value->has_pc = true;
1235
        info->value->pc = env->pc;
1236
        info->value->has_npc = true;
1237
        info->value->npc = env->npc;
1238
#elif defined(TARGET_MIPS)
1239
        info->value->has_PC = true;
1240
        info->value->PC = env->active_tc.PC;
1241
#endif
1242

    
1243
        /* XXX: waiting for the qapi to support GSList */
1244
        if (!cur_item) {
1245
            head = cur_item = info;
1246
        } else {
1247
            cur_item->next = info;
1248
            cur_item = info;
1249
        }
1250
    }
1251

    
1252
    return head;
1253
}
1254

    
1255
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1256
                 bool has_cpu, int64_t cpu_index, Error **errp)
1257
{
1258
    FILE *f;
1259
    uint32_t l;
1260
    CPUArchState *env;
1261
    CPUState *cpu;
1262
    uint8_t buf[1024];
1263

    
1264
    if (!has_cpu) {
1265
        cpu_index = 0;
1266
    }
1267

    
1268
    cpu = qemu_get_cpu(cpu_index);
1269
    if (cpu == NULL) {
1270
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1271
                  "a CPU number");
1272
        return;
1273
    }
1274
    env = cpu->env_ptr;
1275

    
1276
    f = fopen(filename, "wb");
1277
    if (!f) {
1278
        error_setg_file_open(errp, errno, filename);
1279
        return;
1280
    }
1281

    
1282
    while (size != 0) {
1283
        l = sizeof(buf);
1284
        if (l > size)
1285
            l = size;
1286
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1287
        if (fwrite(buf, 1, l, f) != l) {
1288
            error_set(errp, QERR_IO_ERROR);
1289
            goto exit;
1290
        }
1291
        addr += l;
1292
        size -= l;
1293
    }
1294

    
1295
exit:
1296
    fclose(f);
1297
}
1298

    
1299
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1300
                  Error **errp)
1301
{
1302
    FILE *f;
1303
    uint32_t l;
1304
    uint8_t buf[1024];
1305

    
1306
    f = fopen(filename, "wb");
1307
    if (!f) {
1308
        error_setg_file_open(errp, errno, filename);
1309
        return;
1310
    }
1311

    
1312
    while (size != 0) {
1313
        l = sizeof(buf);
1314
        if (l > size)
1315
            l = size;
1316
        cpu_physical_memory_rw(addr, buf, l, 0);
1317
        if (fwrite(buf, 1, l, f) != l) {
1318
            error_set(errp, QERR_IO_ERROR);
1319
            goto exit;
1320
        }
1321
        addr += l;
1322
        size -= l;
1323
    }
1324

    
1325
exit:
1326
    fclose(f);
1327
}
1328

    
1329
void qmp_inject_nmi(Error **errp)
1330
{
1331
#if defined(TARGET_I386)
1332
    CPUArchState *env;
1333

    
1334
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1335
        if (!env->apic_state) {
1336
            cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
1337
        } else {
1338
            apic_deliver_nmi(env->apic_state);
1339
        }
1340
    }
1341
#else
1342
    error_set(errp, QERR_UNSUPPORTED);
1343
#endif
1344
}