Statistics
| Branch: | Revision:

root / cpus.c @ 29b358f9

History | View | Annotate | Download (33.5 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor/monitor.h"
29
#include "sysemu/sysemu.h"
30
#include "exec/gdbstub.h"
31
#include "sysemu/dma.h"
32
#include "sysemu/kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu/thread.h"
36
#include "sysemu/cpus.h"
37
#include "sysemu/qtest.h"
38
#include "qemu/main-loop.h"
39
#include "qemu/bitmap.h"
40

    
41
#ifndef _WIN32
42
#include "qemu/compatfd.h"
43
#endif
44

    
45
#ifdef CONFIG_LINUX
46

    
47
#include <sys/prctl.h>
48

    
49
#ifndef PR_MCE_KILL
50
#define PR_MCE_KILL 33
51
#endif
52

    
53
#ifndef PR_MCE_KILL_SET
54
#define PR_MCE_KILL_SET 1
55
#endif
56

    
57
#ifndef PR_MCE_KILL_EARLY
58
#define PR_MCE_KILL_EARLY 1
59
#endif
60

    
61
#endif /* CONFIG_LINUX */
62

    
63
static CPUArchState *next_cpu;
64

    
65
static bool cpu_thread_is_idle(CPUState *cpu)
66
{
67
    if (cpu->stop || cpu->queued_work_first) {
68
        return false;
69
    }
70
    if (cpu->stopped || !runstate_is_running()) {
71
        return true;
72
    }
73
    if (!cpu->halted || qemu_cpu_has_work(cpu) ||
74
        kvm_halt_in_kernel()) {
75
        return false;
76
    }
77
    return true;
78
}
79

    
80
static bool all_cpu_threads_idle(void)
81
{
82
    CPUArchState *env;
83

    
84
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
85
        if (!cpu_thread_is_idle(ENV_GET_CPU(env))) {
86
            return false;
87
        }
88
    }
89
    return true;
90
}
91

    
92
/***********************************************************/
93
/* guest cycle counter */
94

    
95
/* Conversion factor from emulated instructions to virtual clock ticks.  */
96
static int icount_time_shift;
97
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
98
#define MAX_ICOUNT_SHIFT 10
99
/* Compensate for varying guest execution speed.  */
100
static int64_t qemu_icount_bias;
101
static QEMUTimer *icount_rt_timer;
102
static QEMUTimer *icount_vm_timer;
103
static QEMUTimer *icount_warp_timer;
104
static int64_t vm_clock_warp_start;
105
static int64_t qemu_icount;
106

    
107
typedef struct TimersState {
108
    int64_t cpu_ticks_prev;
109
    int64_t cpu_ticks_offset;
110
    int64_t cpu_clock_offset;
111
    int32_t cpu_ticks_enabled;
112
    int64_t dummy;
113
} TimersState;
114

    
115
TimersState timers_state;
116

    
117
/* Return the virtual CPU time, based on the instruction counter.  */
118
int64_t cpu_get_icount(void)
119
{
120
    int64_t icount;
121
    CPUArchState *env = cpu_single_env;
122

    
123
    icount = qemu_icount;
124
    if (env) {
125
        if (!can_do_io(env)) {
126
            fprintf(stderr, "Bad clock read\n");
127
        }
128
        icount -= (env->icount_decr.u16.low + env->icount_extra);
129
    }
130
    return qemu_icount_bias + (icount << icount_time_shift);
131
}
132

    
133
/* return the host CPU cycle counter and handle stop/restart */
134
int64_t cpu_get_ticks(void)
135
{
136
    if (use_icount) {
137
        return cpu_get_icount();
138
    }
139
    if (!timers_state.cpu_ticks_enabled) {
140
        return timers_state.cpu_ticks_offset;
141
    } else {
142
        int64_t ticks;
143
        ticks = cpu_get_real_ticks();
144
        if (timers_state.cpu_ticks_prev > ticks) {
145
            /* Note: non increasing ticks may happen if the host uses
146
               software suspend */
147
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
148
        }
149
        timers_state.cpu_ticks_prev = ticks;
150
        return ticks + timers_state.cpu_ticks_offset;
151
    }
152
}
153

    
154
/* return the host CPU monotonic timer and handle stop/restart */
155
int64_t cpu_get_clock(void)
156
{
157
    int64_t ti;
158
    if (!timers_state.cpu_ticks_enabled) {
159
        return timers_state.cpu_clock_offset;
160
    } else {
161
        ti = get_clock();
162
        return ti + timers_state.cpu_clock_offset;
163
    }
164
}
165

    
166
/* enable cpu_get_ticks() */
167
void cpu_enable_ticks(void)
168
{
169
    if (!timers_state.cpu_ticks_enabled) {
170
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
171
        timers_state.cpu_clock_offset -= get_clock();
172
        timers_state.cpu_ticks_enabled = 1;
173
    }
174
}
175

    
176
/* disable cpu_get_ticks() : the clock is stopped. You must not call
177
   cpu_get_ticks() after that.  */
178
void cpu_disable_ticks(void)
179
{
180
    if (timers_state.cpu_ticks_enabled) {
181
        timers_state.cpu_ticks_offset = cpu_get_ticks();
182
        timers_state.cpu_clock_offset = cpu_get_clock();
183
        timers_state.cpu_ticks_enabled = 0;
184
    }
185
}
186

    
187
/* Correlation between real and virtual time is always going to be
188
   fairly approximate, so ignore small variation.
189
   When the guest is idle real and virtual time will be aligned in
190
   the IO wait loop.  */
191
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
192

    
193
static void icount_adjust(void)
194
{
195
    int64_t cur_time;
196
    int64_t cur_icount;
197
    int64_t delta;
198
    static int64_t last_delta;
199
    /* If the VM is not running, then do nothing.  */
200
    if (!runstate_is_running()) {
201
        return;
202
    }
203
    cur_time = cpu_get_clock();
204
    cur_icount = qemu_get_clock_ns(vm_clock);
205
    delta = cur_icount - cur_time;
206
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
207
    if (delta > 0
208
        && last_delta + ICOUNT_WOBBLE < delta * 2
209
        && icount_time_shift > 0) {
210
        /* The guest is getting too far ahead.  Slow time down.  */
211
        icount_time_shift--;
212
    }
213
    if (delta < 0
214
        && last_delta - ICOUNT_WOBBLE > delta * 2
215
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
216
        /* The guest is getting too far behind.  Speed time up.  */
217
        icount_time_shift++;
218
    }
219
    last_delta = delta;
220
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
221
}
222

    
223
static void icount_adjust_rt(void *opaque)
224
{
225
    qemu_mod_timer(icount_rt_timer,
226
                   qemu_get_clock_ms(rt_clock) + 1000);
227
    icount_adjust();
228
}
229

    
230
static void icount_adjust_vm(void *opaque)
231
{
232
    qemu_mod_timer(icount_vm_timer,
233
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
234
    icount_adjust();
235
}
236

    
237
static int64_t qemu_icount_round(int64_t count)
238
{
239
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
240
}
241

    
242
static void icount_warp_rt(void *opaque)
243
{
244
    if (vm_clock_warp_start == -1) {
245
        return;
246
    }
247

    
248
    if (runstate_is_running()) {
249
        int64_t clock = qemu_get_clock_ns(rt_clock);
250
        int64_t warp_delta = clock - vm_clock_warp_start;
251
        if (use_icount == 1) {
252
            qemu_icount_bias += warp_delta;
253
        } else {
254
            /*
255
             * In adaptive mode, do not let the vm_clock run too
256
             * far ahead of real time.
257
             */
258
            int64_t cur_time = cpu_get_clock();
259
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
260
            int64_t delta = cur_time - cur_icount;
261
            qemu_icount_bias += MIN(warp_delta, delta);
262
        }
263
        if (qemu_clock_expired(vm_clock)) {
264
            qemu_notify_event();
265
        }
266
    }
267
    vm_clock_warp_start = -1;
268
}
269

    
270
void qtest_clock_warp(int64_t dest)
271
{
272
    int64_t clock = qemu_get_clock_ns(vm_clock);
273
    assert(qtest_enabled());
274
    while (clock < dest) {
275
        int64_t deadline = qemu_clock_deadline(vm_clock);
276
        int64_t warp = MIN(dest - clock, deadline);
277
        qemu_icount_bias += warp;
278
        qemu_run_timers(vm_clock);
279
        clock = qemu_get_clock_ns(vm_clock);
280
    }
281
    qemu_notify_event();
282
}
283

    
284
void qemu_clock_warp(QEMUClock *clock)
285
{
286
    int64_t deadline;
287

    
288
    /*
289
     * There are too many global variables to make the "warp" behavior
290
     * applicable to other clocks.  But a clock argument removes the
291
     * need for if statements all over the place.
292
     */
293
    if (clock != vm_clock || !use_icount) {
294
        return;
295
    }
296

    
297
    /*
298
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
299
     * ensures that the deadline for the timer is computed correctly below.
300
     * This also makes sure that the insn counter is synchronized before the
301
     * CPU starts running, in case the CPU is woken by an event other than
302
     * the earliest vm_clock timer.
303
     */
304
    icount_warp_rt(NULL);
305
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
306
        qemu_del_timer(icount_warp_timer);
307
        return;
308
    }
309

    
310
    if (qtest_enabled()) {
311
        /* When testing, qtest commands advance icount.  */
312
        return;
313
    }
314

    
315
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
316
    deadline = qemu_clock_deadline(vm_clock);
317
    if (deadline > 0) {
318
        /*
319
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
320
         * sleep.  Otherwise, the CPU might be waiting for a future timer
321
         * interrupt to wake it up, but the interrupt never comes because
322
         * the vCPU isn't running any insns and thus doesn't advance the
323
         * vm_clock.
324
         *
325
         * An extreme solution for this problem would be to never let VCPUs
326
         * sleep in icount mode if there is a pending vm_clock timer; rather
327
         * time could just advance to the next vm_clock event.  Instead, we
328
         * do stop VCPUs and only advance vm_clock after some "real" time,
329
         * (related to the time left until the next event) has passed.  This
330
         * rt_clock timer will do this.  This avoids that the warps are too
331
         * visible externally---for example, you will not be sending network
332
         * packets continuously instead of every 100ms.
333
         */
334
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
335
    } else {
336
        qemu_notify_event();
337
    }
338
}
339

    
340
static const VMStateDescription vmstate_timers = {
341
    .name = "timer",
342
    .version_id = 2,
343
    .minimum_version_id = 1,
344
    .minimum_version_id_old = 1,
345
    .fields      = (VMStateField[]) {
346
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
347
        VMSTATE_INT64(dummy, TimersState),
348
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
349
        VMSTATE_END_OF_LIST()
350
    }
351
};
352

    
353
void configure_icount(const char *option)
354
{
355
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
356
    if (!option) {
357
        return;
358
    }
359

    
360
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
361
    if (strcmp(option, "auto") != 0) {
362
        icount_time_shift = strtol(option, NULL, 0);
363
        use_icount = 1;
364
        return;
365
    }
366

    
367
    use_icount = 2;
368

    
369
    /* 125MIPS seems a reasonable initial guess at the guest speed.
370
       It will be corrected fairly quickly anyway.  */
371
    icount_time_shift = 3;
372

    
373
    /* Have both realtime and virtual time triggers for speed adjustment.
374
       The realtime trigger catches emulated time passing too slowly,
375
       the virtual time trigger catches emulated time passing too fast.
376
       Realtime triggers occur even when idle, so use them less frequently
377
       than VM triggers.  */
378
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
379
    qemu_mod_timer(icount_rt_timer,
380
                   qemu_get_clock_ms(rt_clock) + 1000);
381
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
382
    qemu_mod_timer(icount_vm_timer,
383
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
384
}
385

    
386
/***********************************************************/
387
void hw_error(const char *fmt, ...)
388
{
389
    va_list ap;
390
    CPUArchState *env;
391
    CPUState *cpu;
392

    
393
    va_start(ap, fmt);
394
    fprintf(stderr, "qemu: hardware error: ");
395
    vfprintf(stderr, fmt, ap);
396
    fprintf(stderr, "\n");
397
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
398
        cpu = ENV_GET_CPU(env);
399
        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
400
        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
401
    }
402
    va_end(ap);
403
    abort();
404
}
405

    
406
void cpu_synchronize_all_states(void)
407
{
408
    CPUArchState *env;
409

    
410
    for (env = first_cpu; env; env = env->next_cpu) {
411
        cpu_synchronize_state(ENV_GET_CPU(env));
412
    }
413
}
414

    
415
void cpu_synchronize_all_post_reset(void)
416
{
417
    CPUArchState *cpu;
418

    
419
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
420
        cpu_synchronize_post_reset(ENV_GET_CPU(cpu));
421
    }
422
}
423

    
424
void cpu_synchronize_all_post_init(void)
425
{
426
    CPUArchState *cpu;
427

    
428
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
429
        cpu_synchronize_post_init(ENV_GET_CPU(cpu));
430
    }
431
}
432

    
433
bool cpu_is_stopped(CPUState *cpu)
434
{
435
    return !runstate_is_running() || cpu->stopped;
436
}
437

    
438
static void do_vm_stop(RunState state)
439
{
440
    if (runstate_is_running()) {
441
        cpu_disable_ticks();
442
        pause_all_vcpus();
443
        runstate_set(state);
444
        vm_state_notify(0, state);
445
        bdrv_drain_all();
446
        bdrv_flush_all();
447
        monitor_protocol_event(QEVENT_STOP, NULL);
448
    }
449
}
450

    
451
static bool cpu_can_run(CPUState *cpu)
452
{
453
    if (cpu->stop) {
454
        return false;
455
    }
456
    if (cpu->stopped || !runstate_is_running()) {
457
        return false;
458
    }
459
    return true;
460
}
461

    
462
static void cpu_handle_guest_debug(CPUState *cpu)
463
{
464
    gdb_set_stop_cpu(cpu);
465
    qemu_system_debug_request();
466
    cpu->stopped = true;
467
}
468

    
469
static void cpu_signal(int sig)
470
{
471
    if (cpu_single_env) {
472
        cpu_exit(ENV_GET_CPU(cpu_single_env));
473
    }
474
    exit_request = 1;
475
}
476

    
477
#ifdef CONFIG_LINUX
478
static void sigbus_reraise(void)
479
{
480
    sigset_t set;
481
    struct sigaction action;
482

    
483
    memset(&action, 0, sizeof(action));
484
    action.sa_handler = SIG_DFL;
485
    if (!sigaction(SIGBUS, &action, NULL)) {
486
        raise(SIGBUS);
487
        sigemptyset(&set);
488
        sigaddset(&set, SIGBUS);
489
        sigprocmask(SIG_UNBLOCK, &set, NULL);
490
    }
491
    perror("Failed to re-raise SIGBUS!\n");
492
    abort();
493
}
494

    
495
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
496
                           void *ctx)
497
{
498
    if (kvm_on_sigbus(siginfo->ssi_code,
499
                      (void *)(intptr_t)siginfo->ssi_addr)) {
500
        sigbus_reraise();
501
    }
502
}
503

    
504
static void qemu_init_sigbus(void)
505
{
506
    struct sigaction action;
507

    
508
    memset(&action, 0, sizeof(action));
509
    action.sa_flags = SA_SIGINFO;
510
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
511
    sigaction(SIGBUS, &action, NULL);
512

    
513
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
514
}
515

    
516
static void qemu_kvm_eat_signals(CPUState *cpu)
517
{
518
    struct timespec ts = { 0, 0 };
519
    siginfo_t siginfo;
520
    sigset_t waitset;
521
    sigset_t chkset;
522
    int r;
523

    
524
    sigemptyset(&waitset);
525
    sigaddset(&waitset, SIG_IPI);
526
    sigaddset(&waitset, SIGBUS);
527

    
528
    do {
529
        r = sigtimedwait(&waitset, &siginfo, &ts);
530
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
531
            perror("sigtimedwait");
532
            exit(1);
533
        }
534

    
535
        switch (r) {
536
        case SIGBUS:
537
            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
538
                sigbus_reraise();
539
            }
540
            break;
541
        default:
542
            break;
543
        }
544

    
545
        r = sigpending(&chkset);
546
        if (r == -1) {
547
            perror("sigpending");
548
            exit(1);
549
        }
550
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
551
}
552

    
553
#else /* !CONFIG_LINUX */
554

    
555
static void qemu_init_sigbus(void)
556
{
557
}
558

    
559
static void qemu_kvm_eat_signals(CPUState *cpu)
560
{
561
}
562
#endif /* !CONFIG_LINUX */
563

    
564
#ifndef _WIN32
565
static void dummy_signal(int sig)
566
{
567
}
568

    
569
static void qemu_kvm_init_cpu_signals(CPUState *cpu)
570
{
571
    int r;
572
    sigset_t set;
573
    struct sigaction sigact;
574

    
575
    memset(&sigact, 0, sizeof(sigact));
576
    sigact.sa_handler = dummy_signal;
577
    sigaction(SIG_IPI, &sigact, NULL);
578

    
579
    pthread_sigmask(SIG_BLOCK, NULL, &set);
580
    sigdelset(&set, SIG_IPI);
581
    sigdelset(&set, SIGBUS);
582
    r = kvm_set_signal_mask(cpu, &set);
583
    if (r) {
584
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
585
        exit(1);
586
    }
587
}
588

    
589
static void qemu_tcg_init_cpu_signals(void)
590
{
591
    sigset_t set;
592
    struct sigaction sigact;
593

    
594
    memset(&sigact, 0, sizeof(sigact));
595
    sigact.sa_handler = cpu_signal;
596
    sigaction(SIG_IPI, &sigact, NULL);
597

    
598
    sigemptyset(&set);
599
    sigaddset(&set, SIG_IPI);
600
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
601
}
602

    
603
#else /* _WIN32 */
604
static void qemu_kvm_init_cpu_signals(CPUState *cpu)
605
{
606
    abort();
607
}
608

    
609
static void qemu_tcg_init_cpu_signals(void)
610
{
611
}
612
#endif /* _WIN32 */
613

    
614
static QemuMutex qemu_global_mutex;
615
static QemuCond qemu_io_proceeded_cond;
616
static bool iothread_requesting_mutex;
617

    
618
static QemuThread io_thread;
619

    
620
static QemuThread *tcg_cpu_thread;
621
static QemuCond *tcg_halt_cond;
622

    
623
/* cpu creation */
624
static QemuCond qemu_cpu_cond;
625
/* system init */
626
static QemuCond qemu_pause_cond;
627
static QemuCond qemu_work_cond;
628

    
629
void qemu_init_cpu_loop(void)
630
{
631
    qemu_init_sigbus();
632
    qemu_cond_init(&qemu_cpu_cond);
633
    qemu_cond_init(&qemu_pause_cond);
634
    qemu_cond_init(&qemu_work_cond);
635
    qemu_cond_init(&qemu_io_proceeded_cond);
636
    qemu_mutex_init(&qemu_global_mutex);
637

    
638
    qemu_thread_get_self(&io_thread);
639
}
640

    
641
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
642
{
643
    struct qemu_work_item wi;
644

    
645
    if (qemu_cpu_is_self(cpu)) {
646
        func(data);
647
        return;
648
    }
649

    
650
    wi.func = func;
651
    wi.data = data;
652
    if (cpu->queued_work_first == NULL) {
653
        cpu->queued_work_first = &wi;
654
    } else {
655
        cpu->queued_work_last->next = &wi;
656
    }
657
    cpu->queued_work_last = &wi;
658
    wi.next = NULL;
659
    wi.done = false;
660

    
661
    qemu_cpu_kick(cpu);
662
    while (!wi.done) {
663
        CPUArchState *self_env = cpu_single_env;
664

    
665
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
666
        cpu_single_env = self_env;
667
    }
668
}
669

    
670
static void flush_queued_work(CPUState *cpu)
671
{
672
    struct qemu_work_item *wi;
673

    
674
    if (cpu->queued_work_first == NULL) {
675
        return;
676
    }
677

    
678
    while ((wi = cpu->queued_work_first)) {
679
        cpu->queued_work_first = wi->next;
680
        wi->func(wi->data);
681
        wi->done = true;
682
    }
683
    cpu->queued_work_last = NULL;
684
    qemu_cond_broadcast(&qemu_work_cond);
685
}
686

    
687
static void qemu_wait_io_event_common(CPUState *cpu)
688
{
689
    if (cpu->stop) {
690
        cpu->stop = false;
691
        cpu->stopped = true;
692
        qemu_cond_signal(&qemu_pause_cond);
693
    }
694
    flush_queued_work(cpu);
695
    cpu->thread_kicked = false;
696
}
697

    
698
static void qemu_tcg_wait_io_event(void)
699
{
700
    CPUArchState *env;
701

    
702
    while (all_cpu_threads_idle()) {
703
       /* Start accounting real time to the virtual clock if the CPUs
704
          are idle.  */
705
        qemu_clock_warp(vm_clock);
706
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
707
    }
708

    
709
    while (iothread_requesting_mutex) {
710
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
711
    }
712

    
713
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
714
        qemu_wait_io_event_common(ENV_GET_CPU(env));
715
    }
716
}
717

    
718
static void qemu_kvm_wait_io_event(CPUState *cpu)
719
{
720
    while (cpu_thread_is_idle(cpu)) {
721
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
722
    }
723

    
724
    qemu_kvm_eat_signals(cpu);
725
    qemu_wait_io_event_common(cpu);
726
}
727

    
728
static void *qemu_kvm_cpu_thread_fn(void *arg)
729
{
730
    CPUState *cpu = arg;
731
    int r;
732

    
733
    qemu_mutex_lock(&qemu_global_mutex);
734
    qemu_thread_get_self(cpu->thread);
735
    cpu->thread_id = qemu_get_thread_id();
736
    cpu_single_env = cpu->env_ptr;
737

    
738
    r = kvm_init_vcpu(cpu);
739
    if (r < 0) {
740
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
741
        exit(1);
742
    }
743

    
744
    qemu_kvm_init_cpu_signals(cpu);
745

    
746
    /* signal CPU creation */
747
    cpu->created = true;
748
    qemu_cond_signal(&qemu_cpu_cond);
749

    
750
    while (1) {
751
        if (cpu_can_run(cpu)) {
752
            r = kvm_cpu_exec(cpu);
753
            if (r == EXCP_DEBUG) {
754
                cpu_handle_guest_debug(cpu);
755
            }
756
        }
757
        qemu_kvm_wait_io_event(cpu);
758
    }
759

    
760
    return NULL;
761
}
762

    
763
static void *qemu_dummy_cpu_thread_fn(void *arg)
764
{
765
#ifdef _WIN32
766
    fprintf(stderr, "qtest is not supported under Windows\n");
767
    exit(1);
768
#else
769
    CPUState *cpu = arg;
770
    sigset_t waitset;
771
    int r;
772

    
773
    qemu_mutex_lock_iothread();
774
    qemu_thread_get_self(cpu->thread);
775
    cpu->thread_id = qemu_get_thread_id();
776

    
777
    sigemptyset(&waitset);
778
    sigaddset(&waitset, SIG_IPI);
779

    
780
    /* signal CPU creation */
781
    cpu->created = true;
782
    qemu_cond_signal(&qemu_cpu_cond);
783

    
784
    cpu_single_env = cpu->env_ptr;
785
    while (1) {
786
        cpu_single_env = NULL;
787
        qemu_mutex_unlock_iothread();
788
        do {
789
            int sig;
790
            r = sigwait(&waitset, &sig);
791
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
792
        if (r == -1) {
793
            perror("sigwait");
794
            exit(1);
795
        }
796
        qemu_mutex_lock_iothread();
797
        cpu_single_env = cpu->env_ptr;
798
        qemu_wait_io_event_common(cpu);
799
    }
800

    
801
    return NULL;
802
#endif
803
}
804

    
805
static void tcg_exec_all(void);
806

    
807
static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
808
{
809
    cpu->thread_id = qemu_get_thread_id();
810
    cpu->created = true;
811
}
812

    
813
static void *qemu_tcg_cpu_thread_fn(void *arg)
814
{
815
    CPUState *cpu = arg;
816
    CPUArchState *env;
817

    
818
    qemu_tcg_init_cpu_signals();
819
    qemu_thread_get_self(cpu->thread);
820

    
821
    qemu_mutex_lock(&qemu_global_mutex);
822
    qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
823
    qemu_cond_signal(&qemu_cpu_cond);
824

    
825
    /* wait for initial kick-off after machine start */
826
    while (ENV_GET_CPU(first_cpu)->stopped) {
827
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
828

    
829
        /* process any pending work */
830
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
831
            qemu_wait_io_event_common(ENV_GET_CPU(env));
832
        }
833
    }
834

    
835
    while (1) {
836
        tcg_exec_all();
837
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
838
            qemu_notify_event();
839
        }
840
        qemu_tcg_wait_io_event();
841
    }
842

    
843
    return NULL;
844
}
845

    
846
static void qemu_cpu_kick_thread(CPUState *cpu)
847
{
848
#ifndef _WIN32
849
    int err;
850

    
851
    err = pthread_kill(cpu->thread->thread, SIG_IPI);
852
    if (err) {
853
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
854
        exit(1);
855
    }
856
#else /* _WIN32 */
857
    if (!qemu_cpu_is_self(cpu)) {
858
        CONTEXT tcgContext;
859

    
860
        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
861
            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
862
                    GetLastError());
863
            exit(1);
864
        }
865

    
866
        /* On multi-core systems, we are not sure that the thread is actually
867
         * suspended until we can get the context.
868
         */
869
        tcgContext.ContextFlags = CONTEXT_CONTROL;
870
        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
871
            continue;
872
        }
873

    
874
        cpu_signal(0);
875

    
876
        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
877
            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
878
                    GetLastError());
879
            exit(1);
880
        }
881
    }
882
#endif
883
}
884

    
885
void qemu_cpu_kick(CPUState *cpu)
886
{
887
    qemu_cond_broadcast(cpu->halt_cond);
888
    if (!tcg_enabled() && !cpu->thread_kicked) {
889
        qemu_cpu_kick_thread(cpu);
890
        cpu->thread_kicked = true;
891
    }
892
}
893

    
894
void qemu_cpu_kick_self(void)
895
{
896
#ifndef _WIN32
897
    assert(cpu_single_env);
898
    CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
899

    
900
    if (!cpu_single_cpu->thread_kicked) {
901
        qemu_cpu_kick_thread(cpu_single_cpu);
902
        cpu_single_cpu->thread_kicked = true;
903
    }
904
#else
905
    abort();
906
#endif
907
}
908

    
909
bool qemu_cpu_is_self(CPUState *cpu)
910
{
911
    return qemu_thread_is_self(cpu->thread);
912
}
913

    
914
static bool qemu_in_vcpu_thread(void)
915
{
916
    return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
917
}
918

    
919
void qemu_mutex_lock_iothread(void)
920
{
921
    if (!tcg_enabled()) {
922
        qemu_mutex_lock(&qemu_global_mutex);
923
    } else {
924
        iothread_requesting_mutex = true;
925
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
926
            qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
927
            qemu_mutex_lock(&qemu_global_mutex);
928
        }
929
        iothread_requesting_mutex = false;
930
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
931
    }
932
}
933

    
934
void qemu_mutex_unlock_iothread(void)
935
{
936
    qemu_mutex_unlock(&qemu_global_mutex);
937
}
938

    
939
static int all_vcpus_paused(void)
940
{
941
    CPUArchState *penv = first_cpu;
942

    
943
    while (penv) {
944
        CPUState *pcpu = ENV_GET_CPU(penv);
945
        if (!pcpu->stopped) {
946
            return 0;
947
        }
948
        penv = penv->next_cpu;
949
    }
950

    
951
    return 1;
952
}
953

    
954
void pause_all_vcpus(void)
955
{
956
    CPUArchState *penv = first_cpu;
957

    
958
    qemu_clock_enable(vm_clock, false);
959
    while (penv) {
960
        CPUState *pcpu = ENV_GET_CPU(penv);
961
        pcpu->stop = true;
962
        qemu_cpu_kick(pcpu);
963
        penv = penv->next_cpu;
964
    }
965

    
966
    if (qemu_in_vcpu_thread()) {
967
        cpu_stop_current();
968
        if (!kvm_enabled()) {
969
            penv = first_cpu;
970
            while (penv) {
971
                CPUState *pcpu = ENV_GET_CPU(penv);
972
                pcpu->stop = false;
973
                pcpu->stopped = true;
974
                penv = penv->next_cpu;
975
            }
976
            return;
977
        }
978
    }
979

    
980
    while (!all_vcpus_paused()) {
981
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
982
        penv = first_cpu;
983
        while (penv) {
984
            qemu_cpu_kick(ENV_GET_CPU(penv));
985
            penv = penv->next_cpu;
986
        }
987
    }
988
}
989

    
990
void cpu_resume(CPUState *cpu)
991
{
992
    cpu->stop = false;
993
    cpu->stopped = false;
994
    qemu_cpu_kick(cpu);
995
}
996

    
997
void resume_all_vcpus(void)
998
{
999
    CPUArchState *penv = first_cpu;
1000

    
1001
    qemu_clock_enable(vm_clock, true);
1002
    while (penv) {
1003
        CPUState *pcpu = ENV_GET_CPU(penv);
1004
        cpu_resume(pcpu);
1005
        penv = penv->next_cpu;
1006
    }
1007
}
1008

    
1009
static void qemu_tcg_init_vcpu(CPUState *cpu)
1010
{
1011
    /* share a single thread for all cpus with TCG */
1012
    if (!tcg_cpu_thread) {
1013
        cpu->thread = g_malloc0(sizeof(QemuThread));
1014
        cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1015
        qemu_cond_init(cpu->halt_cond);
1016
        tcg_halt_cond = cpu->halt_cond;
1017
        qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1018
                           QEMU_THREAD_JOINABLE);
1019
#ifdef _WIN32
1020
        cpu->hThread = qemu_thread_get_handle(cpu->thread);
1021
#endif
1022
        while (!cpu->created) {
1023
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1024
        }
1025
        tcg_cpu_thread = cpu->thread;
1026
    } else {
1027
        cpu->thread = tcg_cpu_thread;
1028
        cpu->halt_cond = tcg_halt_cond;
1029
    }
1030
}
1031

    
1032
static void qemu_kvm_start_vcpu(CPUState *cpu)
1033
{
1034
    cpu->thread = g_malloc0(sizeof(QemuThread));
1035
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1036
    qemu_cond_init(cpu->halt_cond);
1037
    qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
1038
                       QEMU_THREAD_JOINABLE);
1039
    while (!cpu->created) {
1040
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1041
    }
1042
}
1043

    
1044
static void qemu_dummy_start_vcpu(CPUState *cpu)
1045
{
1046
    cpu->thread = g_malloc0(sizeof(QemuThread));
1047
    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1048
    qemu_cond_init(cpu->halt_cond);
1049
    qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
1050
                       QEMU_THREAD_JOINABLE);
1051
    while (!cpu->created) {
1052
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1053
    }
1054
}
1055

    
1056
void qemu_init_vcpu(CPUState *cpu)
1057
{
1058
    cpu->nr_cores = smp_cores;
1059
    cpu->nr_threads = smp_threads;
1060
    cpu->stopped = true;
1061
    if (kvm_enabled()) {
1062
        qemu_kvm_start_vcpu(cpu);
1063
    } else if (tcg_enabled()) {
1064
        qemu_tcg_init_vcpu(cpu);
1065
    } else {
1066
        qemu_dummy_start_vcpu(cpu);
1067
    }
1068
}
1069

    
1070
void cpu_stop_current(void)
1071
{
1072
    if (cpu_single_env) {
1073
        CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
1074
        cpu_single_cpu->stop = false;
1075
        cpu_single_cpu->stopped = true;
1076
        cpu_exit(cpu_single_cpu);
1077
        qemu_cond_signal(&qemu_pause_cond);
1078
    }
1079
}
1080

    
1081
void vm_stop(RunState state)
1082
{
1083
    if (qemu_in_vcpu_thread()) {
1084
        qemu_system_vmstop_request(state);
1085
        /*
1086
         * FIXME: should not return to device code in case
1087
         * vm_stop() has been requested.
1088
         */
1089
        cpu_stop_current();
1090
        return;
1091
    }
1092
    do_vm_stop(state);
1093
}
1094

    
1095
/* does a state transition even if the VM is already stopped,
1096
   current state is forgotten forever */
1097
void vm_stop_force_state(RunState state)
1098
{
1099
    if (runstate_is_running()) {
1100
        vm_stop(state);
1101
    } else {
1102
        runstate_set(state);
1103
    }
1104
}
1105

    
1106
static int tcg_cpu_exec(CPUArchState *env)
1107
{
1108
    int ret;
1109
#ifdef CONFIG_PROFILER
1110
    int64_t ti;
1111
#endif
1112

    
1113
#ifdef CONFIG_PROFILER
1114
    ti = profile_getclock();
1115
#endif
1116
    if (use_icount) {
1117
        int64_t count;
1118
        int decr;
1119
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1120
        env->icount_decr.u16.low = 0;
1121
        env->icount_extra = 0;
1122
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1123
        qemu_icount += count;
1124
        decr = (count > 0xffff) ? 0xffff : count;
1125
        count -= decr;
1126
        env->icount_decr.u16.low = decr;
1127
        env->icount_extra = count;
1128
    }
1129
    ret = cpu_exec(env);
1130
#ifdef CONFIG_PROFILER
1131
    qemu_time += profile_getclock() - ti;
1132
#endif
1133
    if (use_icount) {
1134
        /* Fold pending instructions back into the
1135
           instruction counter, and clear the interrupt flag.  */
1136
        qemu_icount -= (env->icount_decr.u16.low
1137
                        + env->icount_extra);
1138
        env->icount_decr.u32 = 0;
1139
        env->icount_extra = 0;
1140
    }
1141
    return ret;
1142
}
1143

    
1144
static void tcg_exec_all(void)
1145
{
1146
    int r;
1147

    
1148
    /* Account partial waits to the vm_clock.  */
1149
    qemu_clock_warp(vm_clock);
1150

    
1151
    if (next_cpu == NULL) {
1152
        next_cpu = first_cpu;
1153
    }
1154
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1155
        CPUArchState *env = next_cpu;
1156
        CPUState *cpu = ENV_GET_CPU(env);
1157

    
1158
        qemu_clock_enable(vm_clock,
1159
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1160

    
1161
        if (cpu_can_run(cpu)) {
1162
            r = tcg_cpu_exec(env);
1163
            if (r == EXCP_DEBUG) {
1164
                cpu_handle_guest_debug(cpu);
1165
                break;
1166
            }
1167
        } else if (cpu->stop || cpu->stopped) {
1168
            break;
1169
        }
1170
    }
1171
    exit_request = 0;
1172
}
1173

    
1174
void set_numa_modes(void)
1175
{
1176
    CPUArchState *env;
1177
    CPUState *cpu;
1178
    int i;
1179

    
1180
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1181
        cpu = ENV_GET_CPU(env);
1182
        for (i = 0; i < nb_numa_nodes; i++) {
1183
            if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1184
                cpu->numa_node = i;
1185
            }
1186
        }
1187
    }
1188
}
1189

    
1190
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1191
{
1192
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1193
#if defined(cpu_list)
1194
    cpu_list(f, cpu_fprintf);
1195
#endif
1196
}
1197

    
1198
CpuInfoList *qmp_query_cpus(Error **errp)
1199
{
1200
    CpuInfoList *head = NULL, *cur_item = NULL;
1201
    CPUArchState *env;
1202

    
1203
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1204
        CPUState *cpu = ENV_GET_CPU(env);
1205
        CpuInfoList *info;
1206

    
1207
        cpu_synchronize_state(cpu);
1208

    
1209
        info = g_malloc0(sizeof(*info));
1210
        info->value = g_malloc0(sizeof(*info->value));
1211
        info->value->CPU = cpu->cpu_index;
1212
        info->value->current = (env == first_cpu);
1213
        info->value->halted = cpu->halted;
1214
        info->value->thread_id = cpu->thread_id;
1215
#if defined(TARGET_I386)
1216
        info->value->has_pc = true;
1217
        info->value->pc = env->eip + env->segs[R_CS].base;
1218
#elif defined(TARGET_PPC)
1219
        info->value->has_nip = true;
1220
        info->value->nip = env->nip;
1221
#elif defined(TARGET_SPARC)
1222
        info->value->has_pc = true;
1223
        info->value->pc = env->pc;
1224
        info->value->has_npc = true;
1225
        info->value->npc = env->npc;
1226
#elif defined(TARGET_MIPS)
1227
        info->value->has_PC = true;
1228
        info->value->PC = env->active_tc.PC;
1229
#endif
1230

    
1231
        /* XXX: waiting for the qapi to support GSList */
1232
        if (!cur_item) {
1233
            head = cur_item = info;
1234
        } else {
1235
            cur_item->next = info;
1236
            cur_item = info;
1237
        }
1238
    }
1239

    
1240
    return head;
1241
}
1242

    
1243
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1244
                 bool has_cpu, int64_t cpu_index, Error **errp)
1245
{
1246
    FILE *f;
1247
    uint32_t l;
1248
    CPUArchState *env;
1249
    CPUState *cpu;
1250
    uint8_t buf[1024];
1251

    
1252
    if (!has_cpu) {
1253
        cpu_index = 0;
1254
    }
1255

    
1256
    cpu = qemu_get_cpu(cpu_index);
1257
    if (cpu == NULL) {
1258
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1259
                  "a CPU number");
1260
        return;
1261
    }
1262
    env = cpu->env_ptr;
1263

    
1264
    f = fopen(filename, "wb");
1265
    if (!f) {
1266
        error_setg_file_open(errp, errno, filename);
1267
        return;
1268
    }
1269

    
1270
    while (size != 0) {
1271
        l = sizeof(buf);
1272
        if (l > size)
1273
            l = size;
1274
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1275
        if (fwrite(buf, 1, l, f) != l) {
1276
            error_set(errp, QERR_IO_ERROR);
1277
            goto exit;
1278
        }
1279
        addr += l;
1280
        size -= l;
1281
    }
1282

    
1283
exit:
1284
    fclose(f);
1285
}
1286

    
1287
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1288
                  Error **errp)
1289
{
1290
    FILE *f;
1291
    uint32_t l;
1292
    uint8_t buf[1024];
1293

    
1294
    f = fopen(filename, "wb");
1295
    if (!f) {
1296
        error_setg_file_open(errp, errno, filename);
1297
        return;
1298
    }
1299

    
1300
    while (size != 0) {
1301
        l = sizeof(buf);
1302
        if (l > size)
1303
            l = size;
1304
        cpu_physical_memory_rw(addr, buf, l, 0);
1305
        if (fwrite(buf, 1, l, f) != l) {
1306
            error_set(errp, QERR_IO_ERROR);
1307
            goto exit;
1308
        }
1309
        addr += l;
1310
        size -= l;
1311
    }
1312

    
1313
exit:
1314
    fclose(f);
1315
}
1316

    
1317
void qmp_inject_nmi(Error **errp)
1318
{
1319
#if defined(TARGET_I386)
1320
    CPUArchState *env;
1321

    
1322
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1323
        if (!env->apic_state) {
1324
            cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
1325
        } else {
1326
            apic_deliver_nmi(env->apic_state);
1327
        }
1328
    }
1329
#else
1330
    error_set(errp, QERR_UNSUPPORTED);
1331
#endif
1332
}