Statistics
| Branch: | Revision:

root / cpus.c @ 216fc9a4

History | View | Annotate | Download (33.1 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor.h"
29
#include "sysemu.h"
30
#include "gdbstub.h"
31
#include "dma.h"
32
#include "kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu-thread.h"
36
#include "cpus.h"
37
#include "qtest.h"
38
#include "main-loop.h"
39

    
40
#ifndef _WIN32
41
#include "compatfd.h"
42
#endif
43

    
44
#ifdef CONFIG_LINUX
45

    
46
#include <sys/prctl.h>
47

    
48
#ifndef PR_MCE_KILL
49
#define PR_MCE_KILL 33
50
#endif
51

    
52
#ifndef PR_MCE_KILL_SET
53
#define PR_MCE_KILL_SET 1
54
#endif
55

    
56
#ifndef PR_MCE_KILL_EARLY
57
#define PR_MCE_KILL_EARLY 1
58
#endif
59

    
60
#endif /* CONFIG_LINUX */
61

    
62
static CPUArchState *next_cpu;
63

    
64
static bool cpu_thread_is_idle(CPUArchState *env)
65
{
66
    if (env->stop || env->queued_work_first) {
67
        return false;
68
    }
69
    if (env->stopped || !runstate_is_running()) {
70
        return true;
71
    }
72
    if (!env->halted || qemu_cpu_has_work(env) || kvm_irqchip_in_kernel()) {
73
        return false;
74
    }
75
    return true;
76
}
77

    
78
static bool all_cpu_threads_idle(void)
79
{
80
    CPUArchState *env;
81

    
82
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
83
        if (!cpu_thread_is_idle(env)) {
84
            return false;
85
        }
86
    }
87
    return true;
88
}
89

    
90
/***********************************************************/
91
/* guest cycle counter */
92

    
93
/* Conversion factor from emulated instructions to virtual clock ticks.  */
94
static int icount_time_shift;
95
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
96
#define MAX_ICOUNT_SHIFT 10
97
/* Compensate for varying guest execution speed.  */
98
static int64_t qemu_icount_bias;
99
static QEMUTimer *icount_rt_timer;
100
static QEMUTimer *icount_vm_timer;
101
static QEMUTimer *icount_warp_timer;
102
static int64_t vm_clock_warp_start;
103
static int64_t qemu_icount;
104

    
105
typedef struct TimersState {
106
    int64_t cpu_ticks_prev;
107
    int64_t cpu_ticks_offset;
108
    int64_t cpu_clock_offset;
109
    int32_t cpu_ticks_enabled;
110
    int64_t dummy;
111
} TimersState;
112

    
113
TimersState timers_state;
114

    
115
/* Return the virtual CPU time, based on the instruction counter.  */
116
int64_t cpu_get_icount(void)
117
{
118
    int64_t icount;
119
    CPUArchState *env = cpu_single_env;
120

    
121
    icount = qemu_icount;
122
    if (env) {
123
        if (!can_do_io(env)) {
124
            fprintf(stderr, "Bad clock read\n");
125
        }
126
        icount -= (env->icount_decr.u16.low + env->icount_extra);
127
    }
128
    return qemu_icount_bias + (icount << icount_time_shift);
129
}
130

    
131
/* return the host CPU cycle counter and handle stop/restart */
132
int64_t cpu_get_ticks(void)
133
{
134
    if (use_icount) {
135
        return cpu_get_icount();
136
    }
137
    if (!timers_state.cpu_ticks_enabled) {
138
        return timers_state.cpu_ticks_offset;
139
    } else {
140
        int64_t ticks;
141
        ticks = cpu_get_real_ticks();
142
        if (timers_state.cpu_ticks_prev > ticks) {
143
            /* Note: non increasing ticks may happen if the host uses
144
               software suspend */
145
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
146
        }
147
        timers_state.cpu_ticks_prev = ticks;
148
        return ticks + timers_state.cpu_ticks_offset;
149
    }
150
}
151

    
152
/* return the host CPU monotonic timer and handle stop/restart */
153
int64_t cpu_get_clock(void)
154
{
155
    int64_t ti;
156
    if (!timers_state.cpu_ticks_enabled) {
157
        return timers_state.cpu_clock_offset;
158
    } else {
159
        ti = get_clock();
160
        return ti + timers_state.cpu_clock_offset;
161
    }
162
}
163

    
164
/* enable cpu_get_ticks() */
165
void cpu_enable_ticks(void)
166
{
167
    if (!timers_state.cpu_ticks_enabled) {
168
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
169
        timers_state.cpu_clock_offset -= get_clock();
170
        timers_state.cpu_ticks_enabled = 1;
171
    }
172
}
173

    
174
/* disable cpu_get_ticks() : the clock is stopped. You must not call
175
   cpu_get_ticks() after that.  */
176
void cpu_disable_ticks(void)
177
{
178
    if (timers_state.cpu_ticks_enabled) {
179
        timers_state.cpu_ticks_offset = cpu_get_ticks();
180
        timers_state.cpu_clock_offset = cpu_get_clock();
181
        timers_state.cpu_ticks_enabled = 0;
182
    }
183
}
184

    
185
/* Correlation between real and virtual time is always going to be
186
   fairly approximate, so ignore small variation.
187
   When the guest is idle real and virtual time will be aligned in
188
   the IO wait loop.  */
189
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
190

    
191
static void icount_adjust(void)
192
{
193
    int64_t cur_time;
194
    int64_t cur_icount;
195
    int64_t delta;
196
    static int64_t last_delta;
197
    /* If the VM is not running, then do nothing.  */
198
    if (!runstate_is_running()) {
199
        return;
200
    }
201
    cur_time = cpu_get_clock();
202
    cur_icount = qemu_get_clock_ns(vm_clock);
203
    delta = cur_icount - cur_time;
204
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
205
    if (delta > 0
206
        && last_delta + ICOUNT_WOBBLE < delta * 2
207
        && icount_time_shift > 0) {
208
        /* The guest is getting too far ahead.  Slow time down.  */
209
        icount_time_shift--;
210
    }
211
    if (delta < 0
212
        && last_delta - ICOUNT_WOBBLE > delta * 2
213
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
214
        /* The guest is getting too far behind.  Speed time up.  */
215
        icount_time_shift++;
216
    }
217
    last_delta = delta;
218
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
219
}
220

    
221
static void icount_adjust_rt(void *opaque)
222
{
223
    qemu_mod_timer(icount_rt_timer,
224
                   qemu_get_clock_ms(rt_clock) + 1000);
225
    icount_adjust();
226
}
227

    
228
static void icount_adjust_vm(void *opaque)
229
{
230
    qemu_mod_timer(icount_vm_timer,
231
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
232
    icount_adjust();
233
}
234

    
235
static int64_t qemu_icount_round(int64_t count)
236
{
237
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
238
}
239

    
240
static void icount_warp_rt(void *opaque)
241
{
242
    if (vm_clock_warp_start == -1) {
243
        return;
244
    }
245

    
246
    if (runstate_is_running()) {
247
        int64_t clock = qemu_get_clock_ns(rt_clock);
248
        int64_t warp_delta = clock - vm_clock_warp_start;
249
        if (use_icount == 1) {
250
            qemu_icount_bias += warp_delta;
251
        } else {
252
            /*
253
             * In adaptive mode, do not let the vm_clock run too
254
             * far ahead of real time.
255
             */
256
            int64_t cur_time = cpu_get_clock();
257
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
258
            int64_t delta = cur_time - cur_icount;
259
            qemu_icount_bias += MIN(warp_delta, delta);
260
        }
261
        if (qemu_clock_expired(vm_clock)) {
262
            qemu_notify_event();
263
        }
264
    }
265
    vm_clock_warp_start = -1;
266
}
267

    
268
void qtest_clock_warp(int64_t dest)
269
{
270
    int64_t clock = qemu_get_clock_ns(vm_clock);
271
    assert(qtest_enabled());
272
    while (clock < dest) {
273
        int64_t deadline = qemu_clock_deadline(vm_clock);
274
        int64_t warp = MIN(dest - clock, deadline);
275
        qemu_icount_bias += warp;
276
        qemu_run_timers(vm_clock);
277
        clock = qemu_get_clock_ns(vm_clock);
278
    }
279
    qemu_notify_event();
280
}
281

    
282
void qemu_clock_warp(QEMUClock *clock)
283
{
284
    int64_t deadline;
285

    
286
    /*
287
     * There are too many global variables to make the "warp" behavior
288
     * applicable to other clocks.  But a clock argument removes the
289
     * need for if statements all over the place.
290
     */
291
    if (clock != vm_clock || !use_icount) {
292
        return;
293
    }
294

    
295
    /*
296
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
297
     * ensures that the deadline for the timer is computed correctly below.
298
     * This also makes sure that the insn counter is synchronized before the
299
     * CPU starts running, in case the CPU is woken by an event other than
300
     * the earliest vm_clock timer.
301
     */
302
    icount_warp_rt(NULL);
303
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
304
        qemu_del_timer(icount_warp_timer);
305
        return;
306
    }
307

    
308
    if (qtest_enabled()) {
309
        /* When testing, qtest commands advance icount.  */
310
        return;
311
    }
312

    
313
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
314
    deadline = qemu_clock_deadline(vm_clock);
315
    if (deadline > 0) {
316
        /*
317
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
318
         * sleep.  Otherwise, the CPU might be waiting for a future timer
319
         * interrupt to wake it up, but the interrupt never comes because
320
         * the vCPU isn't running any insns and thus doesn't advance the
321
         * vm_clock.
322
         *
323
         * An extreme solution for this problem would be to never let VCPUs
324
         * sleep in icount mode if there is a pending vm_clock timer; rather
325
         * time could just advance to the next vm_clock event.  Instead, we
326
         * do stop VCPUs and only advance vm_clock after some "real" time,
327
         * (related to the time left until the next event) has passed.  This
328
         * rt_clock timer will do this.  This avoids that the warps are too
329
         * visible externally---for example, you will not be sending network
330
         * packets continuously instead of every 100ms.
331
         */
332
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
333
    } else {
334
        qemu_notify_event();
335
    }
336
}
337

    
338
static const VMStateDescription vmstate_timers = {
339
    .name = "timer",
340
    .version_id = 2,
341
    .minimum_version_id = 1,
342
    .minimum_version_id_old = 1,
343
    .fields      = (VMStateField[]) {
344
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
345
        VMSTATE_INT64(dummy, TimersState),
346
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
347
        VMSTATE_END_OF_LIST()
348
    }
349
};
350

    
351
void configure_icount(const char *option)
352
{
353
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
354
    if (!option) {
355
        return;
356
    }
357

    
358
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
359
    if (strcmp(option, "auto") != 0) {
360
        icount_time_shift = strtol(option, NULL, 0);
361
        use_icount = 1;
362
        return;
363
    }
364

    
365
    use_icount = 2;
366

    
367
    /* 125MIPS seems a reasonable initial guess at the guest speed.
368
       It will be corrected fairly quickly anyway.  */
369
    icount_time_shift = 3;
370

    
371
    /* Have both realtime and virtual time triggers for speed adjustment.
372
       The realtime trigger catches emulated time passing too slowly,
373
       the virtual time trigger catches emulated time passing too fast.
374
       Realtime triggers occur even when idle, so use them less frequently
375
       than VM triggers.  */
376
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
377
    qemu_mod_timer(icount_rt_timer,
378
                   qemu_get_clock_ms(rt_clock) + 1000);
379
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
380
    qemu_mod_timer(icount_vm_timer,
381
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
382
}
383

    
384
/***********************************************************/
385
void hw_error(const char *fmt, ...)
386
{
387
    va_list ap;
388
    CPUArchState *env;
389

    
390
    va_start(ap, fmt);
391
    fprintf(stderr, "qemu: hardware error: ");
392
    vfprintf(stderr, fmt, ap);
393
    fprintf(stderr, "\n");
394
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
395
        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
396
#ifdef TARGET_I386
397
        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
398
#else
399
        cpu_dump_state(env, stderr, fprintf, 0);
400
#endif
401
    }
402
    va_end(ap);
403
    abort();
404
}
405

    
406
void cpu_synchronize_all_states(void)
407
{
408
    CPUArchState *cpu;
409

    
410
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
411
        cpu_synchronize_state(cpu);
412
    }
413
}
414

    
415
void cpu_synchronize_all_post_reset(void)
416
{
417
    CPUArchState *cpu;
418

    
419
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
420
        cpu_synchronize_post_reset(cpu);
421
    }
422
}
423

    
424
void cpu_synchronize_all_post_init(void)
425
{
426
    CPUArchState *cpu;
427

    
428
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
429
        cpu_synchronize_post_init(cpu);
430
    }
431
}
432

    
433
int cpu_is_stopped(CPUArchState *env)
434
{
435
    return !runstate_is_running() || env->stopped;
436
}
437

    
438
static void do_vm_stop(RunState state)
439
{
440
    if (runstate_is_running()) {
441
        cpu_disable_ticks();
442
        pause_all_vcpus();
443
        runstate_set(state);
444
        vm_state_notify(0, state);
445
        bdrv_drain_all();
446
        bdrv_flush_all();
447
        monitor_protocol_event(QEVENT_STOP, NULL);
448
    }
449
}
450

    
451
static int cpu_can_run(CPUArchState *env)
452
{
453
    if (env->stop) {
454
        return 0;
455
    }
456
    if (env->stopped || !runstate_is_running()) {
457
        return 0;
458
    }
459
    return 1;
460
}
461

    
462
static void cpu_handle_guest_debug(CPUArchState *env)
463
{
464
    gdb_set_stop_cpu(env);
465
    qemu_system_debug_request();
466
    env->stopped = 1;
467
}
468

    
469
static void cpu_signal(int sig)
470
{
471
    if (cpu_single_env) {
472
        cpu_exit(cpu_single_env);
473
    }
474
    exit_request = 1;
475
}
476

    
477
#ifdef CONFIG_LINUX
478
static void sigbus_reraise(void)
479
{
480
    sigset_t set;
481
    struct sigaction action;
482

    
483
    memset(&action, 0, sizeof(action));
484
    action.sa_handler = SIG_DFL;
485
    if (!sigaction(SIGBUS, &action, NULL)) {
486
        raise(SIGBUS);
487
        sigemptyset(&set);
488
        sigaddset(&set, SIGBUS);
489
        sigprocmask(SIG_UNBLOCK, &set, NULL);
490
    }
491
    perror("Failed to re-raise SIGBUS!\n");
492
    abort();
493
}
494

    
495
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
496
                           void *ctx)
497
{
498
    if (kvm_on_sigbus(siginfo->ssi_code,
499
                      (void *)(intptr_t)siginfo->ssi_addr)) {
500
        sigbus_reraise();
501
    }
502
}
503

    
504
static void qemu_init_sigbus(void)
505
{
506
    struct sigaction action;
507

    
508
    memset(&action, 0, sizeof(action));
509
    action.sa_flags = SA_SIGINFO;
510
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
511
    sigaction(SIGBUS, &action, NULL);
512

    
513
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
514
}
515

    
516
static void qemu_kvm_eat_signals(CPUArchState *env)
517
{
518
    struct timespec ts = { 0, 0 };
519
    siginfo_t siginfo;
520
    sigset_t waitset;
521
    sigset_t chkset;
522
    int r;
523

    
524
    sigemptyset(&waitset);
525
    sigaddset(&waitset, SIG_IPI);
526
    sigaddset(&waitset, SIGBUS);
527

    
528
    do {
529
        r = sigtimedwait(&waitset, &siginfo, &ts);
530
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
531
            perror("sigtimedwait");
532
            exit(1);
533
        }
534

    
535
        switch (r) {
536
        case SIGBUS:
537
            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
538
                sigbus_reraise();
539
            }
540
            break;
541
        default:
542
            break;
543
        }
544

    
545
        r = sigpending(&chkset);
546
        if (r == -1) {
547
            perror("sigpending");
548
            exit(1);
549
        }
550
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
551
}
552

    
553
#else /* !CONFIG_LINUX */
554

    
555
static void qemu_init_sigbus(void)
556
{
557
}
558

    
559
static void qemu_kvm_eat_signals(CPUArchState *env)
560
{
561
}
562
#endif /* !CONFIG_LINUX */
563

    
564
#ifndef _WIN32
565
static void dummy_signal(int sig)
566
{
567
}
568

    
569
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
570
{
571
    int r;
572
    sigset_t set;
573
    struct sigaction sigact;
574

    
575
    memset(&sigact, 0, sizeof(sigact));
576
    sigact.sa_handler = dummy_signal;
577
    sigaction(SIG_IPI, &sigact, NULL);
578

    
579
    pthread_sigmask(SIG_BLOCK, NULL, &set);
580
    sigdelset(&set, SIG_IPI);
581
    sigdelset(&set, SIGBUS);
582
    r = kvm_set_signal_mask(env, &set);
583
    if (r) {
584
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
585
        exit(1);
586
    }
587
}
588

    
589
static void qemu_tcg_init_cpu_signals(void)
590
{
591
    sigset_t set;
592
    struct sigaction sigact;
593

    
594
    memset(&sigact, 0, sizeof(sigact));
595
    sigact.sa_handler = cpu_signal;
596
    sigaction(SIG_IPI, &sigact, NULL);
597

    
598
    sigemptyset(&set);
599
    sigaddset(&set, SIG_IPI);
600
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
601
}
602

    
603
#else /* _WIN32 */
604
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
605
{
606
    abort();
607
}
608

    
609
static void qemu_tcg_init_cpu_signals(void)
610
{
611
}
612
#endif /* _WIN32 */
613

    
614
QemuMutex qemu_global_mutex;
615
static QemuCond qemu_io_proceeded_cond;
616
static bool iothread_requesting_mutex;
617

    
618
static QemuThread io_thread;
619

    
620
static QemuThread *tcg_cpu_thread;
621
static QemuCond *tcg_halt_cond;
622

    
623
/* cpu creation */
624
static QemuCond qemu_cpu_cond;
625
/* system init */
626
static QemuCond qemu_pause_cond;
627
static QemuCond qemu_work_cond;
628

    
629
void qemu_init_cpu_loop(void)
630
{
631
    qemu_init_sigbus();
632
    qemu_cond_init(&qemu_cpu_cond);
633
    qemu_cond_init(&qemu_pause_cond);
634
    qemu_cond_init(&qemu_work_cond);
635
    qemu_cond_init(&qemu_io_proceeded_cond);
636
    qemu_mutex_init(&qemu_global_mutex);
637

    
638
    qemu_thread_get_self(&io_thread);
639
}
640

    
641
void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
642
{
643
    struct qemu_work_item wi;
644

    
645
    if (qemu_cpu_is_self(env)) {
646
        func(data);
647
        return;
648
    }
649

    
650
    wi.func = func;
651
    wi.data = data;
652
    if (!env->queued_work_first) {
653
        env->queued_work_first = &wi;
654
    } else {
655
        env->queued_work_last->next = &wi;
656
    }
657
    env->queued_work_last = &wi;
658
    wi.next = NULL;
659
    wi.done = false;
660

    
661
    qemu_cpu_kick(env);
662
    while (!wi.done) {
663
        CPUArchState *self_env = cpu_single_env;
664

    
665
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
666
        cpu_single_env = self_env;
667
    }
668
}
669

    
670
static void flush_queued_work(CPUArchState *env)
671
{
672
    struct qemu_work_item *wi;
673

    
674
    if (!env->queued_work_first) {
675
        return;
676
    }
677

    
678
    while ((wi = env->queued_work_first)) {
679
        env->queued_work_first = wi->next;
680
        wi->func(wi->data);
681
        wi->done = true;
682
    }
683
    env->queued_work_last = NULL;
684
    qemu_cond_broadcast(&qemu_work_cond);
685
}
686

    
687
static void qemu_wait_io_event_common(CPUArchState *env)
688
{
689
    CPUState *cpu = ENV_GET_CPU(env);
690

    
691
    if (env->stop) {
692
        env->stop = 0;
693
        env->stopped = 1;
694
        qemu_cond_signal(&qemu_pause_cond);
695
    }
696
    flush_queued_work(env);
697
    cpu->thread_kicked = false;
698
}
699

    
700
static void qemu_tcg_wait_io_event(void)
701
{
702
    CPUArchState *env;
703

    
704
    while (all_cpu_threads_idle()) {
705
       /* Start accounting real time to the virtual clock if the CPUs
706
          are idle.  */
707
        qemu_clock_warp(vm_clock);
708
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
709
    }
710

    
711
    while (iothread_requesting_mutex) {
712
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
713
    }
714

    
715
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
716
        qemu_wait_io_event_common(env);
717
    }
718
}
719

    
720
static void qemu_kvm_wait_io_event(CPUArchState *env)
721
{
722
    while (cpu_thread_is_idle(env)) {
723
        qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
724
    }
725

    
726
    qemu_kvm_eat_signals(env);
727
    qemu_wait_io_event_common(env);
728
}
729

    
730
static void *qemu_kvm_cpu_thread_fn(void *arg)
731
{
732
    CPUArchState *env = arg;
733
    CPUState *cpu = ENV_GET_CPU(env);
734
    int r;
735

    
736
    qemu_mutex_lock(&qemu_global_mutex);
737
    qemu_thread_get_self(cpu->thread);
738
    env->thread_id = qemu_get_thread_id();
739
    cpu_single_env = env;
740

    
741
    r = kvm_init_vcpu(env);
742
    if (r < 0) {
743
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
744
        exit(1);
745
    }
746

    
747
    qemu_kvm_init_cpu_signals(env);
748

    
749
    /* signal CPU creation */
750
    env->created = 1;
751
    qemu_cond_signal(&qemu_cpu_cond);
752

    
753
    while (1) {
754
        if (cpu_can_run(env)) {
755
            r = kvm_cpu_exec(env);
756
            if (r == EXCP_DEBUG) {
757
                cpu_handle_guest_debug(env);
758
            }
759
        }
760
        qemu_kvm_wait_io_event(env);
761
    }
762

    
763
    return NULL;
764
}
765

    
766
static void *qemu_dummy_cpu_thread_fn(void *arg)
767
{
768
#ifdef _WIN32
769
    fprintf(stderr, "qtest is not supported under Windows\n");
770
    exit(1);
771
#else
772
    CPUArchState *env = arg;
773
    CPUState *cpu = ENV_GET_CPU(env);
774
    sigset_t waitset;
775
    int r;
776

    
777
    qemu_mutex_lock_iothread();
778
    qemu_thread_get_self(cpu->thread);
779
    env->thread_id = qemu_get_thread_id();
780

    
781
    sigemptyset(&waitset);
782
    sigaddset(&waitset, SIG_IPI);
783

    
784
    /* signal CPU creation */
785
    env->created = 1;
786
    qemu_cond_signal(&qemu_cpu_cond);
787

    
788
    cpu_single_env = env;
789
    while (1) {
790
        cpu_single_env = NULL;
791
        qemu_mutex_unlock_iothread();
792
        do {
793
            int sig;
794
            r = sigwait(&waitset, &sig);
795
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
796
        if (r == -1) {
797
            perror("sigwait");
798
            exit(1);
799
        }
800
        qemu_mutex_lock_iothread();
801
        cpu_single_env = env;
802
        qemu_wait_io_event_common(env);
803
    }
804

    
805
    return NULL;
806
#endif
807
}
808

    
809
static void tcg_exec_all(void);
810

    
811
static void *qemu_tcg_cpu_thread_fn(void *arg)
812
{
813
    CPUArchState *env = arg;
814
    CPUState *cpu = ENV_GET_CPU(env);
815

    
816
    qemu_tcg_init_cpu_signals();
817
    qemu_thread_get_self(cpu->thread);
818

    
819
    /* signal CPU creation */
820
    qemu_mutex_lock(&qemu_global_mutex);
821
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
822
        env->thread_id = qemu_get_thread_id();
823
        env->created = 1;
824
    }
825
    qemu_cond_signal(&qemu_cpu_cond);
826

    
827
    /* wait for initial kick-off after machine start */
828
    while (first_cpu->stopped) {
829
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
830

    
831
        /* process any pending work */
832
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
833
            qemu_wait_io_event_common(env);
834
        }
835
    }
836

    
837
    while (1) {
838
        tcg_exec_all();
839
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
840
            qemu_notify_event();
841
        }
842
        qemu_tcg_wait_io_event();
843
    }
844

    
845
    return NULL;
846
}
847

    
848
static void qemu_cpu_kick_thread(CPUArchState *env)
849
{
850
    CPUState *cpu = ENV_GET_CPU(env);
851
#ifndef _WIN32
852
    int err;
853

    
854
    err = pthread_kill(cpu->thread->thread, SIG_IPI);
855
    if (err) {
856
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
857
        exit(1);
858
    }
859
#else /* _WIN32 */
860
    if (!qemu_cpu_is_self(env)) {
861
        SuspendThread(cpu->hThread);
862
        cpu_signal(0);
863
        ResumeThread(cpu->hThread);
864
    }
865
#endif
866
}
867

    
868
void qemu_cpu_kick(void *_env)
869
{
870
    CPUArchState *env = _env;
871
    CPUState *cpu = ENV_GET_CPU(env);
872

    
873
    qemu_cond_broadcast(env->halt_cond);
874
    if (!tcg_enabled() && !cpu->thread_kicked) {
875
        qemu_cpu_kick_thread(env);
876
        cpu->thread_kicked = true;
877
    }
878
}
879

    
880
void qemu_cpu_kick_self(void)
881
{
882
#ifndef _WIN32
883
    assert(cpu_single_env);
884
    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
885

    
886
    if (!cpu_single_cpu->thread_kicked) {
887
        qemu_cpu_kick_thread(cpu_single_env);
888
        cpu_single_cpu->thread_kicked = true;
889
    }
890
#else
891
    abort();
892
#endif
893
}
894

    
895
int qemu_cpu_is_self(void *_env)
896
{
897
    CPUArchState *env = _env;
898
    CPUState *cpu = ENV_GET_CPU(env);
899

    
900
    return qemu_thread_is_self(cpu->thread);
901
}
902

    
903
void qemu_mutex_lock_iothread(void)
904
{
905
    if (!tcg_enabled()) {
906
        qemu_mutex_lock(&qemu_global_mutex);
907
    } else {
908
        iothread_requesting_mutex = true;
909
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
910
            qemu_cpu_kick_thread(first_cpu);
911
            qemu_mutex_lock(&qemu_global_mutex);
912
        }
913
        iothread_requesting_mutex = false;
914
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
915
    }
916
}
917

    
918
void qemu_mutex_unlock_iothread(void)
919
{
920
    qemu_mutex_unlock(&qemu_global_mutex);
921
}
922

    
923
static int all_vcpus_paused(void)
924
{
925
    CPUArchState *penv = first_cpu;
926

    
927
    while (penv) {
928
        if (!penv->stopped) {
929
            return 0;
930
        }
931
        penv = penv->next_cpu;
932
    }
933

    
934
    return 1;
935
}
936

    
937
void pause_all_vcpus(void)
938
{
939
    CPUArchState *penv = first_cpu;
940

    
941
    qemu_clock_enable(vm_clock, false);
942
    while (penv) {
943
        penv->stop = 1;
944
        qemu_cpu_kick(penv);
945
        penv = penv->next_cpu;
946
    }
947

    
948
    if (!qemu_thread_is_self(&io_thread)) {
949
        cpu_stop_current();
950
        if (!kvm_enabled()) {
951
            while (penv) {
952
                penv->stop = 0;
953
                penv->stopped = 1;
954
                penv = penv->next_cpu;
955
            }
956
            return;
957
        }
958
    }
959

    
960
    while (!all_vcpus_paused()) {
961
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
962
        penv = first_cpu;
963
        while (penv) {
964
            qemu_cpu_kick(penv);
965
            penv = penv->next_cpu;
966
        }
967
    }
968
}
969

    
970
void resume_all_vcpus(void)
971
{
972
    CPUArchState *penv = first_cpu;
973

    
974
    qemu_clock_enable(vm_clock, true);
975
    while (penv) {
976
        penv->stop = 0;
977
        penv->stopped = 0;
978
        qemu_cpu_kick(penv);
979
        penv = penv->next_cpu;
980
    }
981
}
982

    
983
static void qemu_tcg_init_vcpu(void *_env)
984
{
985
    CPUArchState *env = _env;
986
    CPUState *cpu = ENV_GET_CPU(env);
987

    
988
    /* share a single thread for all cpus with TCG */
989
    if (!tcg_cpu_thread) {
990
        cpu->thread = g_malloc0(sizeof(QemuThread));
991
        env->halt_cond = g_malloc0(sizeof(QemuCond));
992
        qemu_cond_init(env->halt_cond);
993
        tcg_halt_cond = env->halt_cond;
994
        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, env,
995
                           QEMU_THREAD_JOINABLE);
996
#ifdef _WIN32
997
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
998
#endif
999
        while (env->created == 0) {
1000
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1001
        }
1002
        tcg_cpu_thread = cpu->thread;
1003
    } else {
1004
        cpu->thread = tcg_cpu_thread;
1005
        env->halt_cond = tcg_halt_cond;
1006
    }
1007
}
1008

    
1009
static void qemu_kvm_start_vcpu(CPUArchState *env)
1010
{
1011
    CPUState *cpu = ENV_GET_CPU(env);
1012

    
1013
    cpu->thread = g_malloc0(sizeof(QemuThread));
1014
    env->halt_cond = g_malloc0(sizeof(QemuCond));
1015
    qemu_cond_init(env->halt_cond);
1016
    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1017
                       QEMU_THREAD_JOINABLE);
1018
    while (env->created == 0) {
1019
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1020
    }
1021
}
1022

    
1023
static void qemu_dummy_start_vcpu(CPUArchState *env)
1024
{
1025
    CPUState *cpu = ENV_GET_CPU(env);
1026

    
1027
    cpu->thread = g_malloc0(sizeof(QemuThread));
1028
    env->halt_cond = g_malloc0(sizeof(QemuCond));
1029
    qemu_cond_init(env->halt_cond);
1030
    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1031
                       QEMU_THREAD_JOINABLE);
1032
    while (env->created == 0) {
1033
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1034
    }
1035
}
1036

    
1037
void qemu_init_vcpu(void *_env)
1038
{
1039
    CPUArchState *env = _env;
1040

    
1041
    env->nr_cores = smp_cores;
1042
    env->nr_threads = smp_threads;
1043
    env->stopped = 1;
1044
    if (kvm_enabled()) {
1045
        qemu_kvm_start_vcpu(env);
1046
    } else if (tcg_enabled()) {
1047
        qemu_tcg_init_vcpu(env);
1048
    } else {
1049
        qemu_dummy_start_vcpu(env);
1050
    }
1051
}
1052

    
1053
void cpu_stop_current(void)
1054
{
1055
    if (cpu_single_env) {
1056
        cpu_single_env->stop = 0;
1057
        cpu_single_env->stopped = 1;
1058
        cpu_exit(cpu_single_env);
1059
        qemu_cond_signal(&qemu_pause_cond);
1060
    }
1061
}
1062

    
1063
void vm_stop(RunState state)
1064
{
1065
    if (!qemu_thread_is_self(&io_thread)) {
1066
        qemu_system_vmstop_request(state);
1067
        /*
1068
         * FIXME: should not return to device code in case
1069
         * vm_stop() has been requested.
1070
         */
1071
        cpu_stop_current();
1072
        return;
1073
    }
1074
    do_vm_stop(state);
1075
}
1076

    
1077
/* does a state transition even if the VM is already stopped,
1078
   current state is forgotten forever */
1079
void vm_stop_force_state(RunState state)
1080
{
1081
    if (runstate_is_running()) {
1082
        vm_stop(state);
1083
    } else {
1084
        runstate_set(state);
1085
    }
1086
}
1087

    
1088
static int tcg_cpu_exec(CPUArchState *env)
1089
{
1090
    int ret;
1091
#ifdef CONFIG_PROFILER
1092
    int64_t ti;
1093
#endif
1094

    
1095
#ifdef CONFIG_PROFILER
1096
    ti = profile_getclock();
1097
#endif
1098
    if (use_icount) {
1099
        int64_t count;
1100
        int decr;
1101
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1102
        env->icount_decr.u16.low = 0;
1103
        env->icount_extra = 0;
1104
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1105
        qemu_icount += count;
1106
        decr = (count > 0xffff) ? 0xffff : count;
1107
        count -= decr;
1108
        env->icount_decr.u16.low = decr;
1109
        env->icount_extra = count;
1110
    }
1111
    ret = cpu_exec(env);
1112
#ifdef CONFIG_PROFILER
1113
    qemu_time += profile_getclock() - ti;
1114
#endif
1115
    if (use_icount) {
1116
        /* Fold pending instructions back into the
1117
           instruction counter, and clear the interrupt flag.  */
1118
        qemu_icount -= (env->icount_decr.u16.low
1119
                        + env->icount_extra);
1120
        env->icount_decr.u32 = 0;
1121
        env->icount_extra = 0;
1122
    }
1123
    return ret;
1124
}
1125

    
1126
static void tcg_exec_all(void)
1127
{
1128
    int r;
1129

    
1130
    /* Account partial waits to the vm_clock.  */
1131
    qemu_clock_warp(vm_clock);
1132

    
1133
    if (next_cpu == NULL) {
1134
        next_cpu = first_cpu;
1135
    }
1136
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1137
        CPUArchState *env = next_cpu;
1138

    
1139
        qemu_clock_enable(vm_clock,
1140
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1141

    
1142
        if (cpu_can_run(env)) {
1143
            r = tcg_cpu_exec(env);
1144
            if (r == EXCP_DEBUG) {
1145
                cpu_handle_guest_debug(env);
1146
                break;
1147
            }
1148
        } else if (env->stop || env->stopped) {
1149
            break;
1150
        }
1151
    }
1152
    exit_request = 0;
1153
}
1154

    
1155
void set_numa_modes(void)
1156
{
1157
    CPUArchState *env;
1158
    int i;
1159

    
1160
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1161
        for (i = 0; i < nb_numa_nodes; i++) {
1162
            if (node_cpumask[i] & (1 << env->cpu_index)) {
1163
                env->numa_node = i;
1164
            }
1165
        }
1166
    }
1167
}
1168

    
1169
void set_cpu_log(const char *optarg)
1170
{
1171
    int mask;
1172
    const CPULogItem *item;
1173

    
1174
    mask = cpu_str_to_log_mask(optarg);
1175
    if (!mask) {
1176
        printf("Log items (comma separated):\n");
1177
        for (item = cpu_log_items; item->mask != 0; item++) {
1178
            printf("%-10s %s\n", item->name, item->help);
1179
        }
1180
        exit(1);
1181
    }
1182
    cpu_set_log(mask);
1183
}
1184

    
1185
void set_cpu_log_filename(const char *optarg)
1186
{
1187
    cpu_set_log_filename(optarg);
1188
}
1189

    
1190
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1191
{
1192
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1193
#if defined(cpu_list_id)
1194
    cpu_list_id(f, cpu_fprintf, optarg);
1195
#elif defined(cpu_list)
1196
    cpu_list(f, cpu_fprintf); /* deprecated */
1197
#endif
1198
}
1199

    
1200
CpuInfoList *qmp_query_cpus(Error **errp)
1201
{
1202
    CpuInfoList *head = NULL, *cur_item = NULL;
1203
    CPUArchState *env;
1204

    
1205
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1206
        CpuInfoList *info;
1207

    
1208
        cpu_synchronize_state(env);
1209

    
1210
        info = g_malloc0(sizeof(*info));
1211
        info->value = g_malloc0(sizeof(*info->value));
1212
        info->value->CPU = env->cpu_index;
1213
        info->value->current = (env == first_cpu);
1214
        info->value->halted = env->halted;
1215
        info->value->thread_id = env->thread_id;
1216
#if defined(TARGET_I386)
1217
        info->value->has_pc = true;
1218
        info->value->pc = env->eip + env->segs[R_CS].base;
1219
#elif defined(TARGET_PPC)
1220
        info->value->has_nip = true;
1221
        info->value->nip = env->nip;
1222
#elif defined(TARGET_SPARC)
1223
        info->value->has_pc = true;
1224
        info->value->pc = env->pc;
1225
        info->value->has_npc = true;
1226
        info->value->npc = env->npc;
1227
#elif defined(TARGET_MIPS)
1228
        info->value->has_PC = true;
1229
        info->value->PC = env->active_tc.PC;
1230
#endif
1231

    
1232
        /* XXX: waiting for the qapi to support GSList */
1233
        if (!cur_item) {
1234
            head = cur_item = info;
1235
        } else {
1236
            cur_item->next = info;
1237
            cur_item = info;
1238
        }
1239
    }
1240

    
1241
    return head;
1242
}
1243

    
1244
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1245
                 bool has_cpu, int64_t cpu_index, Error **errp)
1246
{
1247
    FILE *f;
1248
    uint32_t l;
1249
    CPUArchState *env;
1250
    uint8_t buf[1024];
1251

    
1252
    if (!has_cpu) {
1253
        cpu_index = 0;
1254
    }
1255

    
1256
    for (env = first_cpu; env; env = env->next_cpu) {
1257
        if (cpu_index == env->cpu_index) {
1258
            break;
1259
        }
1260
    }
1261

    
1262
    if (env == NULL) {
1263
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1264
                  "a CPU number");
1265
        return;
1266
    }
1267

    
1268
    f = fopen(filename, "wb");
1269
    if (!f) {
1270
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1271
        return;
1272
    }
1273

    
1274
    while (size != 0) {
1275
        l = sizeof(buf);
1276
        if (l > size)
1277
            l = size;
1278
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1279
        if (fwrite(buf, 1, l, f) != l) {
1280
            error_set(errp, QERR_IO_ERROR);
1281
            goto exit;
1282
        }
1283
        addr += l;
1284
        size -= l;
1285
    }
1286

    
1287
exit:
1288
    fclose(f);
1289
}
1290

    
1291
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1292
                  Error **errp)
1293
{
1294
    FILE *f;
1295
    uint32_t l;
1296
    uint8_t buf[1024];
1297

    
1298
    f = fopen(filename, "wb");
1299
    if (!f) {
1300
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1301
        return;
1302
    }
1303

    
1304
    while (size != 0) {
1305
        l = sizeof(buf);
1306
        if (l > size)
1307
            l = size;
1308
        cpu_physical_memory_rw(addr, buf, l, 0);
1309
        if (fwrite(buf, 1, l, f) != l) {
1310
            error_set(errp, QERR_IO_ERROR);
1311
            goto exit;
1312
        }
1313
        addr += l;
1314
        size -= l;
1315
    }
1316

    
1317
exit:
1318
    fclose(f);
1319
}
1320

    
1321
void qmp_inject_nmi(Error **errp)
1322
{
1323
#if defined(TARGET_I386)
1324
    CPUArchState *env;
1325

    
1326
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1327
        if (!env->apic_state) {
1328
            cpu_interrupt(env, CPU_INTERRUPT_NMI);
1329
        } else {
1330
            apic_deliver_nmi(env->apic_state);
1331
        }
1332
    }
1333
#else
1334
    error_set(errp, QERR_UNSUPPORTED);
1335
#endif
1336
}