Statistics
| Branch: | Revision:

root / cpus.c @ 09f1bbcd

History | View | Annotate | Download (32.7 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
/* Needed early for CONFIG_BSD etc. */
26
#include "config-host.h"
27

    
28
#include "monitor.h"
29
#include "sysemu.h"
30
#include "gdbstub.h"
31
#include "dma.h"
32
#include "kvm.h"
33
#include "qmp-commands.h"
34

    
35
#include "qemu-thread.h"
36
#include "cpus.h"
37
#include "qtest.h"
38
#include "main-loop.h"
39

    
40
#ifndef _WIN32
41
#include "compatfd.h"
42
#endif
43

    
44
#ifdef CONFIG_LINUX
45

    
46
#include <sys/prctl.h>
47

    
48
#ifndef PR_MCE_KILL
49
#define PR_MCE_KILL 33
50
#endif
51

    
52
#ifndef PR_MCE_KILL_SET
53
#define PR_MCE_KILL_SET 1
54
#endif
55

    
56
#ifndef PR_MCE_KILL_EARLY
57
#define PR_MCE_KILL_EARLY 1
58
#endif
59

    
60
#endif /* CONFIG_LINUX */
61

    
62
static CPUArchState *next_cpu;
63

    
64
/***********************************************************/
65
/* guest cycle counter */
66

    
67
/* Conversion factor from emulated instructions to virtual clock ticks.  */
68
static int icount_time_shift;
69
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
70
#define MAX_ICOUNT_SHIFT 10
71
/* Compensate for varying guest execution speed.  */
72
static int64_t qemu_icount_bias;
73
static QEMUTimer *icount_rt_timer;
74
static QEMUTimer *icount_vm_timer;
75
static QEMUTimer *icount_warp_timer;
76
static int64_t vm_clock_warp_start;
77
static int64_t qemu_icount;
78

    
79
typedef struct TimersState {
80
    int64_t cpu_ticks_prev;
81
    int64_t cpu_ticks_offset;
82
    int64_t cpu_clock_offset;
83
    int32_t cpu_ticks_enabled;
84
    int64_t dummy;
85
} TimersState;
86

    
87
TimersState timers_state;
88

    
89
/* Return the virtual CPU time, based on the instruction counter.  */
90
int64_t cpu_get_icount(void)
91
{
92
    int64_t icount;
93
    CPUArchState *env = cpu_single_env;
94

    
95
    icount = qemu_icount;
96
    if (env) {
97
        if (!can_do_io(env)) {
98
            fprintf(stderr, "Bad clock read\n");
99
        }
100
        icount -= (env->icount_decr.u16.low + env->icount_extra);
101
    }
102
    return qemu_icount_bias + (icount << icount_time_shift);
103
}
104

    
105
/* return the host CPU cycle counter and handle stop/restart */
106
int64_t cpu_get_ticks(void)
107
{
108
    if (use_icount) {
109
        return cpu_get_icount();
110
    }
111
    if (!timers_state.cpu_ticks_enabled) {
112
        return timers_state.cpu_ticks_offset;
113
    } else {
114
        int64_t ticks;
115
        ticks = cpu_get_real_ticks();
116
        if (timers_state.cpu_ticks_prev > ticks) {
117
            /* Note: non increasing ticks may happen if the host uses
118
               software suspend */
119
            timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
120
        }
121
        timers_state.cpu_ticks_prev = ticks;
122
        return ticks + timers_state.cpu_ticks_offset;
123
    }
124
}
125

    
126
/* return the host CPU monotonic timer and handle stop/restart */
127
int64_t cpu_get_clock(void)
128
{
129
    int64_t ti;
130
    if (!timers_state.cpu_ticks_enabled) {
131
        return timers_state.cpu_clock_offset;
132
    } else {
133
        ti = get_clock();
134
        return ti + timers_state.cpu_clock_offset;
135
    }
136
}
137

    
138
/* enable cpu_get_ticks() */
139
void cpu_enable_ticks(void)
140
{
141
    if (!timers_state.cpu_ticks_enabled) {
142
        timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
143
        timers_state.cpu_clock_offset -= get_clock();
144
        timers_state.cpu_ticks_enabled = 1;
145
    }
146
}
147

    
148
/* disable cpu_get_ticks() : the clock is stopped. You must not call
149
   cpu_get_ticks() after that.  */
150
void cpu_disable_ticks(void)
151
{
152
    if (timers_state.cpu_ticks_enabled) {
153
        timers_state.cpu_ticks_offset = cpu_get_ticks();
154
        timers_state.cpu_clock_offset = cpu_get_clock();
155
        timers_state.cpu_ticks_enabled = 0;
156
    }
157
}
158

    
159
/* Correlation between real and virtual time is always going to be
160
   fairly approximate, so ignore small variation.
161
   When the guest is idle real and virtual time will be aligned in
162
   the IO wait loop.  */
163
#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
164

    
165
static void icount_adjust(void)
166
{
167
    int64_t cur_time;
168
    int64_t cur_icount;
169
    int64_t delta;
170
    static int64_t last_delta;
171
    /* If the VM is not running, then do nothing.  */
172
    if (!runstate_is_running()) {
173
        return;
174
    }
175
    cur_time = cpu_get_clock();
176
    cur_icount = qemu_get_clock_ns(vm_clock);
177
    delta = cur_icount - cur_time;
178
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
179
    if (delta > 0
180
        && last_delta + ICOUNT_WOBBLE < delta * 2
181
        && icount_time_shift > 0) {
182
        /* The guest is getting too far ahead.  Slow time down.  */
183
        icount_time_shift--;
184
    }
185
    if (delta < 0
186
        && last_delta - ICOUNT_WOBBLE > delta * 2
187
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
188
        /* The guest is getting too far behind.  Speed time up.  */
189
        icount_time_shift++;
190
    }
191
    last_delta = delta;
192
    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
193
}
194

    
195
static void icount_adjust_rt(void *opaque)
196
{
197
    qemu_mod_timer(icount_rt_timer,
198
                   qemu_get_clock_ms(rt_clock) + 1000);
199
    icount_adjust();
200
}
201

    
202
static void icount_adjust_vm(void *opaque)
203
{
204
    qemu_mod_timer(icount_vm_timer,
205
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
206
    icount_adjust();
207
}
208

    
209
static int64_t qemu_icount_round(int64_t count)
210
{
211
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
212
}
213

    
214
static void icount_warp_rt(void *opaque)
215
{
216
    if (vm_clock_warp_start == -1) {
217
        return;
218
    }
219

    
220
    if (runstate_is_running()) {
221
        int64_t clock = qemu_get_clock_ns(rt_clock);
222
        int64_t warp_delta = clock - vm_clock_warp_start;
223
        if (use_icount == 1) {
224
            qemu_icount_bias += warp_delta;
225
        } else {
226
            /*
227
             * In adaptive mode, do not let the vm_clock run too
228
             * far ahead of real time.
229
             */
230
            int64_t cur_time = cpu_get_clock();
231
            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
232
            int64_t delta = cur_time - cur_icount;
233
            qemu_icount_bias += MIN(warp_delta, delta);
234
        }
235
        if (qemu_clock_expired(vm_clock)) {
236
            qemu_notify_event();
237
        }
238
    }
239
    vm_clock_warp_start = -1;
240
}
241

    
242
void qtest_clock_warp(int64_t dest)
243
{
244
    int64_t clock = qemu_get_clock_ns(vm_clock);
245
    assert(qtest_enabled());
246
    while (clock < dest) {
247
        int64_t deadline = qemu_clock_deadline(vm_clock);
248
        int64_t warp = MIN(dest - clock, deadline);
249
        qemu_icount_bias += warp;
250
        qemu_run_timers(vm_clock);
251
        clock = qemu_get_clock_ns(vm_clock);
252
    }
253
    qemu_notify_event();
254
}
255

    
256
void qemu_clock_warp(QEMUClock *clock)
257
{
258
    int64_t deadline;
259

    
260
    /*
261
     * There are too many global variables to make the "warp" behavior
262
     * applicable to other clocks.  But a clock argument removes the
263
     * need for if statements all over the place.
264
     */
265
    if (clock != vm_clock || !use_icount) {
266
        return;
267
    }
268

    
269
    /*
270
     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
271
     * ensures that the deadline for the timer is computed correctly below.
272
     * This also makes sure that the insn counter is synchronized before the
273
     * CPU starts running, in case the CPU is woken by an event other than
274
     * the earliest vm_clock timer.
275
     */
276
    icount_warp_rt(NULL);
277
    if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
278
        qemu_del_timer(icount_warp_timer);
279
        return;
280
    }
281

    
282
    if (qtest_enabled()) {
283
        /* When testing, qtest commands advance icount.  */
284
        return;
285
    }
286

    
287
    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
288
    deadline = qemu_clock_deadline(vm_clock);
289
    if (deadline > 0) {
290
        /*
291
         * Ensure the vm_clock proceeds even when the virtual CPU goes to
292
         * sleep.  Otherwise, the CPU might be waiting for a future timer
293
         * interrupt to wake it up, but the interrupt never comes because
294
         * the vCPU isn't running any insns and thus doesn't advance the
295
         * vm_clock.
296
         *
297
         * An extreme solution for this problem would be to never let VCPUs
298
         * sleep in icount mode if there is a pending vm_clock timer; rather
299
         * time could just advance to the next vm_clock event.  Instead, we
300
         * do stop VCPUs and only advance vm_clock after some "real" time,
301
         * (related to the time left until the next event) has passed.  This
302
         * rt_clock timer will do this.  This avoids that the warps are too
303
         * visible externally---for example, you will not be sending network
304
         * packets continuously instead of every 100ms.
305
         */
306
        qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
307
    } else {
308
        qemu_notify_event();
309
    }
310
}
311

    
312
static const VMStateDescription vmstate_timers = {
313
    .name = "timer",
314
    .version_id = 2,
315
    .minimum_version_id = 1,
316
    .minimum_version_id_old = 1,
317
    .fields      = (VMStateField[]) {
318
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
319
        VMSTATE_INT64(dummy, TimersState),
320
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
321
        VMSTATE_END_OF_LIST()
322
    }
323
};
324

    
325
void configure_icount(const char *option)
326
{
327
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
328
    if (!option) {
329
        return;
330
    }
331

    
332
    icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
333
    if (strcmp(option, "auto") != 0) {
334
        icount_time_shift = strtol(option, NULL, 0);
335
        use_icount = 1;
336
        return;
337
    }
338

    
339
    use_icount = 2;
340

    
341
    /* 125MIPS seems a reasonable initial guess at the guest speed.
342
       It will be corrected fairly quickly anyway.  */
343
    icount_time_shift = 3;
344

    
345
    /* Have both realtime and virtual time triggers for speed adjustment.
346
       The realtime trigger catches emulated time passing too slowly,
347
       the virtual time trigger catches emulated time passing too fast.
348
       Realtime triggers occur even when idle, so use them less frequently
349
       than VM triggers.  */
350
    icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
351
    qemu_mod_timer(icount_rt_timer,
352
                   qemu_get_clock_ms(rt_clock) + 1000);
353
    icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
354
    qemu_mod_timer(icount_vm_timer,
355
                   qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
356
}
357

    
358
/***********************************************************/
359
void hw_error(const char *fmt, ...)
360
{
361
    va_list ap;
362
    CPUArchState *env;
363

    
364
    va_start(ap, fmt);
365
    fprintf(stderr, "qemu: hardware error: ");
366
    vfprintf(stderr, fmt, ap);
367
    fprintf(stderr, "\n");
368
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
369
        fprintf(stderr, "CPU #%d:\n", env->cpu_index);
370
#ifdef TARGET_I386
371
        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
372
#else
373
        cpu_dump_state(env, stderr, fprintf, 0);
374
#endif
375
    }
376
    va_end(ap);
377
    abort();
378
}
379

    
380
void cpu_synchronize_all_states(void)
381
{
382
    CPUArchState *cpu;
383

    
384
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
385
        cpu_synchronize_state(cpu);
386
    }
387
}
388

    
389
void cpu_synchronize_all_post_reset(void)
390
{
391
    CPUArchState *cpu;
392

    
393
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
394
        cpu_synchronize_post_reset(cpu);
395
    }
396
}
397

    
398
void cpu_synchronize_all_post_init(void)
399
{
400
    CPUArchState *cpu;
401

    
402
    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
403
        cpu_synchronize_post_init(cpu);
404
    }
405
}
406

    
407
int cpu_is_stopped(CPUArchState *env)
408
{
409
    return !runstate_is_running() || env->stopped;
410
}
411

    
412
static void do_vm_stop(RunState state)
413
{
414
    if (runstate_is_running()) {
415
        cpu_disable_ticks();
416
        pause_all_vcpus();
417
        runstate_set(state);
418
        vm_state_notify(0, state);
419
        bdrv_drain_all();
420
        bdrv_flush_all();
421
        monitor_protocol_event(QEVENT_STOP, NULL);
422
    }
423
}
424

    
425
static int cpu_can_run(CPUArchState *env)
426
{
427
    if (env->stop) {
428
        return 0;
429
    }
430
    if (env->stopped || !runstate_is_running()) {
431
        return 0;
432
    }
433
    return 1;
434
}
435

    
436
static bool cpu_thread_is_idle(CPUArchState *env)
437
{
438
    if (env->stop || env->queued_work_first) {
439
        return false;
440
    }
441
    if (env->stopped || !runstate_is_running()) {
442
        return true;
443
    }
444
    if (!env->halted || qemu_cpu_has_work(env) || kvm_irqchip_in_kernel()) {
445
        return false;
446
    }
447
    return true;
448
}
449

    
450
bool all_cpu_threads_idle(void)
451
{
452
    CPUArchState *env;
453

    
454
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
455
        if (!cpu_thread_is_idle(env)) {
456
            return false;
457
        }
458
    }
459
    return true;
460
}
461

    
462
static void cpu_handle_guest_debug(CPUArchState *env)
463
{
464
    gdb_set_stop_cpu(env);
465
    qemu_system_debug_request();
466
    env->stopped = 1;
467
}
468

    
469
static void cpu_signal(int sig)
470
{
471
    if (cpu_single_env) {
472
        cpu_exit(cpu_single_env);
473
    }
474
    exit_request = 1;
475
}
476

    
477
#ifdef CONFIG_LINUX
478
static void sigbus_reraise(void)
479
{
480
    sigset_t set;
481
    struct sigaction action;
482

    
483
    memset(&action, 0, sizeof(action));
484
    action.sa_handler = SIG_DFL;
485
    if (!sigaction(SIGBUS, &action, NULL)) {
486
        raise(SIGBUS);
487
        sigemptyset(&set);
488
        sigaddset(&set, SIGBUS);
489
        sigprocmask(SIG_UNBLOCK, &set, NULL);
490
    }
491
    perror("Failed to re-raise SIGBUS!\n");
492
    abort();
493
}
494

    
495
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
496
                           void *ctx)
497
{
498
    if (kvm_on_sigbus(siginfo->ssi_code,
499
                      (void *)(intptr_t)siginfo->ssi_addr)) {
500
        sigbus_reraise();
501
    }
502
}
503

    
504
static void qemu_init_sigbus(void)
505
{
506
    struct sigaction action;
507

    
508
    memset(&action, 0, sizeof(action));
509
    action.sa_flags = SA_SIGINFO;
510
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
511
    sigaction(SIGBUS, &action, NULL);
512

    
513
    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
514
}
515

    
516
static void qemu_kvm_eat_signals(CPUArchState *env)
517
{
518
    struct timespec ts = { 0, 0 };
519
    siginfo_t siginfo;
520
    sigset_t waitset;
521
    sigset_t chkset;
522
    int r;
523

    
524
    sigemptyset(&waitset);
525
    sigaddset(&waitset, SIG_IPI);
526
    sigaddset(&waitset, SIGBUS);
527

    
528
    do {
529
        r = sigtimedwait(&waitset, &siginfo, &ts);
530
        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
531
            perror("sigtimedwait");
532
            exit(1);
533
        }
534

    
535
        switch (r) {
536
        case SIGBUS:
537
            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
538
                sigbus_reraise();
539
            }
540
            break;
541
        default:
542
            break;
543
        }
544

    
545
        r = sigpending(&chkset);
546
        if (r == -1) {
547
            perror("sigpending");
548
            exit(1);
549
        }
550
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
551
}
552

    
553
#else /* !CONFIG_LINUX */
554

    
555
static void qemu_init_sigbus(void)
556
{
557
}
558

    
559
static void qemu_kvm_eat_signals(CPUArchState *env)
560
{
561
}
562
#endif /* !CONFIG_LINUX */
563

    
564
#ifndef _WIN32
565
static void dummy_signal(int sig)
566
{
567
}
568

    
569
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
570
{
571
    int r;
572
    sigset_t set;
573
    struct sigaction sigact;
574

    
575
    memset(&sigact, 0, sizeof(sigact));
576
    sigact.sa_handler = dummy_signal;
577
    sigaction(SIG_IPI, &sigact, NULL);
578

    
579
    pthread_sigmask(SIG_BLOCK, NULL, &set);
580
    sigdelset(&set, SIG_IPI);
581
    sigdelset(&set, SIGBUS);
582
    r = kvm_set_signal_mask(env, &set);
583
    if (r) {
584
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
585
        exit(1);
586
    }
587
}
588

    
589
static void qemu_tcg_init_cpu_signals(void)
590
{
591
    sigset_t set;
592
    struct sigaction sigact;
593

    
594
    memset(&sigact, 0, sizeof(sigact));
595
    sigact.sa_handler = cpu_signal;
596
    sigaction(SIG_IPI, &sigact, NULL);
597

    
598
    sigemptyset(&set);
599
    sigaddset(&set, SIG_IPI);
600
    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
601
}
602

    
603
#else /* _WIN32 */
604
static void qemu_kvm_init_cpu_signals(CPUArchState *env)
605
{
606
    abort();
607
}
608

    
609
static void qemu_tcg_init_cpu_signals(void)
610
{
611
}
612
#endif /* _WIN32 */
613

    
614
QemuMutex qemu_global_mutex;
615
static QemuCond qemu_io_proceeded_cond;
616
static bool iothread_requesting_mutex;
617

    
618
static QemuThread io_thread;
619

    
620
static QemuThread *tcg_cpu_thread;
621
static QemuCond *tcg_halt_cond;
622

    
623
/* cpu creation */
624
static QemuCond qemu_cpu_cond;
625
/* system init */
626
static QemuCond qemu_pause_cond;
627
static QemuCond qemu_work_cond;
628

    
629
void qemu_init_cpu_loop(void)
630
{
631
    qemu_init_sigbus();
632
    qemu_cond_init(&qemu_cpu_cond);
633
    qemu_cond_init(&qemu_pause_cond);
634
    qemu_cond_init(&qemu_work_cond);
635
    qemu_cond_init(&qemu_io_proceeded_cond);
636
    qemu_mutex_init(&qemu_global_mutex);
637

    
638
    qemu_thread_get_self(&io_thread);
639
}
640

    
641
void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
642
{
643
    struct qemu_work_item wi;
644

    
645
    if (qemu_cpu_is_self(env)) {
646
        func(data);
647
        return;
648
    }
649

    
650
    wi.func = func;
651
    wi.data = data;
652
    if (!env->queued_work_first) {
653
        env->queued_work_first = &wi;
654
    } else {
655
        env->queued_work_last->next = &wi;
656
    }
657
    env->queued_work_last = &wi;
658
    wi.next = NULL;
659
    wi.done = false;
660

    
661
    qemu_cpu_kick(env);
662
    while (!wi.done) {
663
        CPUArchState *self_env = cpu_single_env;
664

    
665
        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
666
        cpu_single_env = self_env;
667
    }
668
}
669

    
670
static void flush_queued_work(CPUArchState *env)
671
{
672
    struct qemu_work_item *wi;
673

    
674
    if (!env->queued_work_first) {
675
        return;
676
    }
677

    
678
    while ((wi = env->queued_work_first)) {
679
        env->queued_work_first = wi->next;
680
        wi->func(wi->data);
681
        wi->done = true;
682
    }
683
    env->queued_work_last = NULL;
684
    qemu_cond_broadcast(&qemu_work_cond);
685
}
686

    
687
static void qemu_wait_io_event_common(CPUArchState *env)
688
{
689
    if (env->stop) {
690
        env->stop = 0;
691
        env->stopped = 1;
692
        qemu_cond_signal(&qemu_pause_cond);
693
    }
694
    flush_queued_work(env);
695
    env->thread_kicked = false;
696
}
697

    
698
static void qemu_tcg_wait_io_event(void)
699
{
700
    CPUArchState *env;
701

    
702
    while (all_cpu_threads_idle()) {
703
       /* Start accounting real time to the virtual clock if the CPUs
704
          are idle.  */
705
        qemu_clock_warp(vm_clock);
706
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
707
    }
708

    
709
    while (iothread_requesting_mutex) {
710
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
711
    }
712

    
713
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
714
        qemu_wait_io_event_common(env);
715
    }
716
}
717

    
718
static void qemu_kvm_wait_io_event(CPUArchState *env)
719
{
720
    while (cpu_thread_is_idle(env)) {
721
        qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
722
    }
723

    
724
    qemu_kvm_eat_signals(env);
725
    qemu_wait_io_event_common(env);
726
}
727

    
728
static void *qemu_kvm_cpu_thread_fn(void *arg)
729
{
730
    CPUArchState *env = arg;
731
    int r;
732

    
733
    qemu_mutex_lock(&qemu_global_mutex);
734
    qemu_thread_get_self(env->thread);
735
    env->thread_id = qemu_get_thread_id();
736
    cpu_single_env = env;
737

    
738
    r = kvm_init_vcpu(env);
739
    if (r < 0) {
740
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
741
        exit(1);
742
    }
743

    
744
    qemu_kvm_init_cpu_signals(env);
745

    
746
    /* signal CPU creation */
747
    env->created = 1;
748
    qemu_cond_signal(&qemu_cpu_cond);
749

    
750
    while (1) {
751
        if (cpu_can_run(env)) {
752
            r = kvm_cpu_exec(env);
753
            if (r == EXCP_DEBUG) {
754
                cpu_handle_guest_debug(env);
755
            }
756
        }
757
        qemu_kvm_wait_io_event(env);
758
    }
759

    
760
    return NULL;
761
}
762

    
763
static void *qemu_dummy_cpu_thread_fn(void *arg)
764
{
765
#ifdef _WIN32
766
    fprintf(stderr, "qtest is not supported under Windows\n");
767
    exit(1);
768
#else
769
    CPUArchState *env = arg;
770
    sigset_t waitset;
771
    int r;
772

    
773
    qemu_mutex_lock_iothread();
774
    qemu_thread_get_self(env->thread);
775
    env->thread_id = qemu_get_thread_id();
776

    
777
    sigemptyset(&waitset);
778
    sigaddset(&waitset, SIG_IPI);
779

    
780
    /* signal CPU creation */
781
    env->created = 1;
782
    qemu_cond_signal(&qemu_cpu_cond);
783

    
784
    cpu_single_env = env;
785
    while (1) {
786
        cpu_single_env = NULL;
787
        qemu_mutex_unlock_iothread();
788
        do {
789
            int sig;
790
            r = sigwait(&waitset, &sig);
791
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
792
        if (r == -1) {
793
            perror("sigwait");
794
            exit(1);
795
        }
796
        qemu_mutex_lock_iothread();
797
        cpu_single_env = env;
798
        qemu_wait_io_event_common(env);
799
    }
800

    
801
    return NULL;
802
#endif
803
}
804

    
805
static void tcg_exec_all(void);
806

    
807
static void *qemu_tcg_cpu_thread_fn(void *arg)
808
{
809
    CPUArchState *env = arg;
810

    
811
    qemu_tcg_init_cpu_signals();
812
    qemu_thread_get_self(env->thread);
813

    
814
    /* signal CPU creation */
815
    qemu_mutex_lock(&qemu_global_mutex);
816
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
817
        env->thread_id = qemu_get_thread_id();
818
        env->created = 1;
819
    }
820
    qemu_cond_signal(&qemu_cpu_cond);
821

    
822
    /* wait for initial kick-off after machine start */
823
    while (first_cpu->stopped) {
824
        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
825

    
826
        /* process any pending work */
827
        for (env = first_cpu; env != NULL; env = env->next_cpu) {
828
            qemu_wait_io_event_common(env);
829
        }
830
    }
831

    
832
    while (1) {
833
        tcg_exec_all();
834
        if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
835
            qemu_notify_event();
836
        }
837
        qemu_tcg_wait_io_event();
838
    }
839

    
840
    return NULL;
841
}
842

    
843
static void qemu_cpu_kick_thread(CPUArchState *env)
844
{
845
#ifndef _WIN32
846
    int err;
847

    
848
    err = pthread_kill(env->thread->thread, SIG_IPI);
849
    if (err) {
850
        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
851
        exit(1);
852
    }
853
#else /* _WIN32 */
854
    if (!qemu_cpu_is_self(env)) {
855
        SuspendThread(env->hThread);
856
        cpu_signal(0);
857
        ResumeThread(env->hThread);
858
    }
859
#endif
860
}
861

    
862
void qemu_cpu_kick(void *_env)
863
{
864
    CPUArchState *env = _env;
865

    
866
    qemu_cond_broadcast(env->halt_cond);
867
    if (!tcg_enabled() && !env->thread_kicked) {
868
        qemu_cpu_kick_thread(env);
869
        env->thread_kicked = true;
870
    }
871
}
872

    
873
void qemu_cpu_kick_self(void)
874
{
875
#ifndef _WIN32
876
    assert(cpu_single_env);
877

    
878
    if (!cpu_single_env->thread_kicked) {
879
        qemu_cpu_kick_thread(cpu_single_env);
880
        cpu_single_env->thread_kicked = true;
881
    }
882
#else
883
    abort();
884
#endif
885
}
886

    
887
int qemu_cpu_is_self(void *_env)
888
{
889
    CPUArchState *env = _env;
890

    
891
    return qemu_thread_is_self(env->thread);
892
}
893

    
894
void qemu_mutex_lock_iothread(void)
895
{
896
    if (!tcg_enabled()) {
897
        qemu_mutex_lock(&qemu_global_mutex);
898
    } else {
899
        iothread_requesting_mutex = true;
900
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
901
            qemu_cpu_kick_thread(first_cpu);
902
            qemu_mutex_lock(&qemu_global_mutex);
903
        }
904
        iothread_requesting_mutex = false;
905
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
906
    }
907
}
908

    
909
void qemu_mutex_unlock_iothread(void)
910
{
911
    qemu_mutex_unlock(&qemu_global_mutex);
912
}
913

    
914
static int all_vcpus_paused(void)
915
{
916
    CPUArchState *penv = first_cpu;
917

    
918
    while (penv) {
919
        if (!penv->stopped) {
920
            return 0;
921
        }
922
        penv = penv->next_cpu;
923
    }
924

    
925
    return 1;
926
}
927

    
928
void pause_all_vcpus(void)
929
{
930
    CPUArchState *penv = first_cpu;
931

    
932
    qemu_clock_enable(vm_clock, false);
933
    while (penv) {
934
        penv->stop = 1;
935
        qemu_cpu_kick(penv);
936
        penv = penv->next_cpu;
937
    }
938

    
939
    if (!qemu_thread_is_self(&io_thread)) {
940
        cpu_stop_current();
941
        if (!kvm_enabled()) {
942
            while (penv) {
943
                penv->stop = 0;
944
                penv->stopped = 1;
945
                penv = penv->next_cpu;
946
            }
947
            return;
948
        }
949
    }
950

    
951
    while (!all_vcpus_paused()) {
952
        qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
953
        penv = first_cpu;
954
        while (penv) {
955
            qemu_cpu_kick(penv);
956
            penv = penv->next_cpu;
957
        }
958
    }
959
}
960

    
961
void resume_all_vcpus(void)
962
{
963
    CPUArchState *penv = first_cpu;
964

    
965
    qemu_clock_enable(vm_clock, true);
966
    while (penv) {
967
        penv->stop = 0;
968
        penv->stopped = 0;
969
        qemu_cpu_kick(penv);
970
        penv = penv->next_cpu;
971
    }
972
}
973

    
974
static void qemu_tcg_init_vcpu(void *_env)
975
{
976
    CPUArchState *env = _env;
977

    
978
    /* share a single thread for all cpus with TCG */
979
    if (!tcg_cpu_thread) {
980
        env->thread = g_malloc0(sizeof(QemuThread));
981
        env->halt_cond = g_malloc0(sizeof(QemuCond));
982
        qemu_cond_init(env->halt_cond);
983
        tcg_halt_cond = env->halt_cond;
984
        qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env,
985
                           QEMU_THREAD_JOINABLE);
986
#ifdef _WIN32
987
        env->hThread = qemu_thread_get_handle(env->thread);
988
#endif
989
        while (env->created == 0) {
990
            qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
991
        }
992
        tcg_cpu_thread = env->thread;
993
    } else {
994
        env->thread = tcg_cpu_thread;
995
        env->halt_cond = tcg_halt_cond;
996
    }
997
}
998

    
999
static void qemu_kvm_start_vcpu(CPUArchState *env)
1000
{
1001
    env->thread = g_malloc0(sizeof(QemuThread));
1002
    env->halt_cond = g_malloc0(sizeof(QemuCond));
1003
    qemu_cond_init(env->halt_cond);
1004
    qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env,
1005
                       QEMU_THREAD_JOINABLE);
1006
    while (env->created == 0) {
1007
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1008
    }
1009
}
1010

    
1011
static void qemu_dummy_start_vcpu(CPUArchState *env)
1012
{
1013
    env->thread = g_malloc0(sizeof(QemuThread));
1014
    env->halt_cond = g_malloc0(sizeof(QemuCond));
1015
    qemu_cond_init(env->halt_cond);
1016
    qemu_thread_create(env->thread, qemu_dummy_cpu_thread_fn, env,
1017
                       QEMU_THREAD_JOINABLE);
1018
    while (env->created == 0) {
1019
        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1020
    }
1021
}
1022

    
1023
void qemu_init_vcpu(void *_env)
1024
{
1025
    CPUArchState *env = _env;
1026

    
1027
    env->nr_cores = smp_cores;
1028
    env->nr_threads = smp_threads;
1029
    env->stopped = 1;
1030
    if (kvm_enabled()) {
1031
        qemu_kvm_start_vcpu(env);
1032
    } else if (tcg_enabled()) {
1033
        qemu_tcg_init_vcpu(env);
1034
    } else {
1035
        qemu_dummy_start_vcpu(env);
1036
    }
1037
}
1038

    
1039
void cpu_stop_current(void)
1040
{
1041
    if (cpu_single_env) {
1042
        cpu_single_env->stop = 0;
1043
        cpu_single_env->stopped = 1;
1044
        cpu_exit(cpu_single_env);
1045
        qemu_cond_signal(&qemu_pause_cond);
1046
    }
1047
}
1048

    
1049
void vm_stop(RunState state)
1050
{
1051
    if (!qemu_thread_is_self(&io_thread)) {
1052
        qemu_system_vmstop_request(state);
1053
        /*
1054
         * FIXME: should not return to device code in case
1055
         * vm_stop() has been requested.
1056
         */
1057
        cpu_stop_current();
1058
        return;
1059
    }
1060
    do_vm_stop(state);
1061
}
1062

    
1063
/* does a state transition even if the VM is already stopped,
1064
   current state is forgotten forever */
1065
void vm_stop_force_state(RunState state)
1066
{
1067
    if (runstate_is_running()) {
1068
        vm_stop(state);
1069
    } else {
1070
        runstate_set(state);
1071
    }
1072
}
1073

    
1074
static int tcg_cpu_exec(CPUArchState *env)
1075
{
1076
    int ret;
1077
#ifdef CONFIG_PROFILER
1078
    int64_t ti;
1079
#endif
1080

    
1081
#ifdef CONFIG_PROFILER
1082
    ti = profile_getclock();
1083
#endif
1084
    if (use_icount) {
1085
        int64_t count;
1086
        int decr;
1087
        qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1088
        env->icount_decr.u16.low = 0;
1089
        env->icount_extra = 0;
1090
        count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1091
        qemu_icount += count;
1092
        decr = (count > 0xffff) ? 0xffff : count;
1093
        count -= decr;
1094
        env->icount_decr.u16.low = decr;
1095
        env->icount_extra = count;
1096
    }
1097
    ret = cpu_exec(env);
1098
#ifdef CONFIG_PROFILER
1099
    qemu_time += profile_getclock() - ti;
1100
#endif
1101
    if (use_icount) {
1102
        /* Fold pending instructions back into the
1103
           instruction counter, and clear the interrupt flag.  */
1104
        qemu_icount -= (env->icount_decr.u16.low
1105
                        + env->icount_extra);
1106
        env->icount_decr.u32 = 0;
1107
        env->icount_extra = 0;
1108
    }
1109
    return ret;
1110
}
1111

    
1112
static void tcg_exec_all(void)
1113
{
1114
    int r;
1115

    
1116
    /* Account partial waits to the vm_clock.  */
1117
    qemu_clock_warp(vm_clock);
1118

    
1119
    if (next_cpu == NULL) {
1120
        next_cpu = first_cpu;
1121
    }
1122
    for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1123
        CPUArchState *env = next_cpu;
1124

    
1125
        qemu_clock_enable(vm_clock,
1126
                          (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1127

    
1128
        if (cpu_can_run(env)) {
1129
            r = tcg_cpu_exec(env);
1130
            if (r == EXCP_DEBUG) {
1131
                cpu_handle_guest_debug(env);
1132
                break;
1133
            }
1134
        } else if (env->stop || env->stopped) {
1135
            break;
1136
        }
1137
    }
1138
    exit_request = 0;
1139
}
1140

    
1141
void set_numa_modes(void)
1142
{
1143
    CPUArchState *env;
1144
    int i;
1145

    
1146
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1147
        for (i = 0; i < nb_numa_nodes; i++) {
1148
            if (node_cpumask[i] & (1 << env->cpu_index)) {
1149
                env->numa_node = i;
1150
            }
1151
        }
1152
    }
1153
}
1154

    
1155
void set_cpu_log(const char *optarg)
1156
{
1157
    int mask;
1158
    const CPULogItem *item;
1159

    
1160
    mask = cpu_str_to_log_mask(optarg);
1161
    if (!mask) {
1162
        printf("Log items (comma separated):\n");
1163
        for (item = cpu_log_items; item->mask != 0; item++) {
1164
            printf("%-10s %s\n", item->name, item->help);
1165
        }
1166
        exit(1);
1167
    }
1168
    cpu_set_log(mask);
1169
}
1170

    
1171
void set_cpu_log_filename(const char *optarg)
1172
{
1173
    cpu_set_log_filename(optarg);
1174
}
1175

    
1176
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1177
{
1178
    /* XXX: implement xxx_cpu_list for targets that still miss it */
1179
#if defined(cpu_list_id)
1180
    cpu_list_id(f, cpu_fprintf, optarg);
1181
#elif defined(cpu_list)
1182
    cpu_list(f, cpu_fprintf); /* deprecated */
1183
#endif
1184
}
1185

    
1186
CpuInfoList *qmp_query_cpus(Error **errp)
1187
{
1188
    CpuInfoList *head = NULL, *cur_item = NULL;
1189
    CPUArchState *env;
1190

    
1191
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1192
        CpuInfoList *info;
1193

    
1194
        cpu_synchronize_state(env);
1195

    
1196
        info = g_malloc0(sizeof(*info));
1197
        info->value = g_malloc0(sizeof(*info->value));
1198
        info->value->CPU = env->cpu_index;
1199
        info->value->current = (env == first_cpu);
1200
        info->value->halted = env->halted;
1201
        info->value->thread_id = env->thread_id;
1202
#if defined(TARGET_I386)
1203
        info->value->has_pc = true;
1204
        info->value->pc = env->eip + env->segs[R_CS].base;
1205
#elif defined(TARGET_PPC)
1206
        info->value->has_nip = true;
1207
        info->value->nip = env->nip;
1208
#elif defined(TARGET_SPARC)
1209
        info->value->has_pc = true;
1210
        info->value->pc = env->pc;
1211
        info->value->has_npc = true;
1212
        info->value->npc = env->npc;
1213
#elif defined(TARGET_MIPS)
1214
        info->value->has_PC = true;
1215
        info->value->PC = env->active_tc.PC;
1216
#endif
1217

    
1218
        /* XXX: waiting for the qapi to support GSList */
1219
        if (!cur_item) {
1220
            head = cur_item = info;
1221
        } else {
1222
            cur_item->next = info;
1223
            cur_item = info;
1224
        }
1225
    }
1226

    
1227
    return head;
1228
}
1229

    
1230
void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1231
                 bool has_cpu, int64_t cpu_index, Error **errp)
1232
{
1233
    FILE *f;
1234
    uint32_t l;
1235
    CPUArchState *env;
1236
    uint8_t buf[1024];
1237

    
1238
    if (!has_cpu) {
1239
        cpu_index = 0;
1240
    }
1241

    
1242
    for (env = first_cpu; env; env = env->next_cpu) {
1243
        if (cpu_index == env->cpu_index) {
1244
            break;
1245
        }
1246
    }
1247

    
1248
    if (env == NULL) {
1249
        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1250
                  "a CPU number");
1251
        return;
1252
    }
1253

    
1254
    f = fopen(filename, "wb");
1255
    if (!f) {
1256
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1257
        return;
1258
    }
1259

    
1260
    while (size != 0) {
1261
        l = sizeof(buf);
1262
        if (l > size)
1263
            l = size;
1264
        cpu_memory_rw_debug(env, addr, buf, l, 0);
1265
        if (fwrite(buf, 1, l, f) != l) {
1266
            error_set(errp, QERR_IO_ERROR);
1267
            goto exit;
1268
        }
1269
        addr += l;
1270
        size -= l;
1271
    }
1272

    
1273
exit:
1274
    fclose(f);
1275
}
1276

    
1277
void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1278
                  Error **errp)
1279
{
1280
    FILE *f;
1281
    uint32_t l;
1282
    uint8_t buf[1024];
1283

    
1284
    f = fopen(filename, "wb");
1285
    if (!f) {
1286
        error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1287
        return;
1288
    }
1289

    
1290
    while (size != 0) {
1291
        l = sizeof(buf);
1292
        if (l > size)
1293
            l = size;
1294
        cpu_physical_memory_rw(addr, buf, l, 0);
1295
        if (fwrite(buf, 1, l, f) != l) {
1296
            error_set(errp, QERR_IO_ERROR);
1297
            goto exit;
1298
        }
1299
        addr += l;
1300
        size -= l;
1301
    }
1302

    
1303
exit:
1304
    fclose(f);
1305
}
1306

    
1307
void qmp_inject_nmi(Error **errp)
1308
{
1309
#if defined(TARGET_I386)
1310
    CPUArchState *env;
1311

    
1312
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1313
        if (!env->apic_state) {
1314
            cpu_interrupt(env, CPU_INTERRUPT_NMI);
1315
        } else {
1316
            apic_deliver_nmi(env->apic_state);
1317
        }
1318
    }
1319
#else
1320
    error_set(errp, QERR_UNSUPPORTED);
1321
#endif
1322
}