Revision c0532a76

b/cpus.c
34 34

  
35 35
#include "cpus.h"
36 36
#include "compatfd.h"
37
#ifdef CONFIG_LINUX
38
#include <sys/prctl.h>
39
#endif
37 40

  
38 41
#ifdef SIGRTMIN
39 42
#define SIG_IPI (SIGRTMIN+4)
......
41 44
#define SIG_IPI SIGUSR1
42 45
#endif
43 46

  
47
#ifndef PR_MCE_KILL
48
#define PR_MCE_KILL 33
49
#endif
50

  
44 51
static CPUState *next_cpu;
45 52

  
46 53
/***********************************************************/
......
498 505
    }
499 506
}
500 507

  
508
static void sigbus_reraise(void)
509
{
510
    sigset_t set;
511
    struct sigaction action;
512

  
513
    memset(&action, 0, sizeof(action));
514
    action.sa_handler = SIG_DFL;
515
    if (!sigaction(SIGBUS, &action, NULL)) {
516
        raise(SIGBUS);
517
        sigemptyset(&set);
518
        sigaddset(&set, SIGBUS);
519
        sigprocmask(SIG_UNBLOCK, &set, NULL);
520
    }
521
    perror("Failed to re-raise SIGBUS!\n");
522
    abort();
523
}
524

  
525
static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
526
                           void *ctx)
527
{
528
#if defined(TARGET_I386)
529
    if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
530
#endif
531
        sigbus_reraise();
532
}
533

  
501 534
static void qemu_kvm_eat_signal(CPUState *env, int timeout)
502 535
{
503 536
    struct timespec ts;
504 537
    int r, e;
505 538
    siginfo_t siginfo;
506 539
    sigset_t waitset;
540
    sigset_t chkset;
507 541

  
508 542
    ts.tv_sec = timeout / 1000;
509 543
    ts.tv_nsec = (timeout % 1000) * 1000000;
510 544

  
511 545
    sigemptyset(&waitset);
512 546
    sigaddset(&waitset, SIG_IPI);
547
    sigaddset(&waitset, SIGBUS);
513 548

  
514
    qemu_mutex_unlock(&qemu_global_mutex);
515
    r = sigtimedwait(&waitset, &siginfo, &ts);
516
    e = errno;
517
    qemu_mutex_lock(&qemu_global_mutex);
549
    do {
550
        qemu_mutex_unlock(&qemu_global_mutex);
518 551

  
519
    if (r == -1 && !(e == EAGAIN || e == EINTR)) {
520
        fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
521
        exit(1);
522
    }
552
        r = sigtimedwait(&waitset, &siginfo, &ts);
553
        e = errno;
554

  
555
        qemu_mutex_lock(&qemu_global_mutex);
556

  
557
        if (r == -1 && !(e == EAGAIN || e == EINTR)) {
558
            fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
559
            exit(1);
560
        }
561

  
562
        switch (r) {
563
        case SIGBUS:
564
#ifdef TARGET_I386
565
            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
566
#endif
567
                sigbus_reraise();
568
            break;
569
        default:
570
            break;
571
        }
572

  
573
        r = sigpending(&chkset);
574
        if (r == -1) {
575
            fprintf(stderr, "sigpending: %s\n", strerror(e));
576
            exit(1);
577
        }
578
    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
523 579
}
524 580

  
525 581
static void qemu_kvm_wait_io_event(CPUState *env)
......
640 696

  
641 697
    pthread_sigmask(SIG_BLOCK, NULL, &set);
642 698
    sigdelset(&set, SIG_IPI);
699
    sigdelset(&set, SIGBUS);
643 700
    r = kvm_set_signal_mask(env, &set);
644 701
    if (r) {
645 702
        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
......
650 707
static sigset_t block_io_signals(void)
651 708
{
652 709
    sigset_t set;
710
    struct sigaction action;
653 711

  
654 712
    /* SIGUSR2 used by posix-aio-compat.c */
655 713
    sigemptyset(&set);
......
660 718
    sigaddset(&set, SIGIO);
661 719
    sigaddset(&set, SIGALRM);
662 720
    sigaddset(&set, SIG_IPI);
721
    sigaddset(&set, SIGBUS);
663 722
    pthread_sigmask(SIG_BLOCK, &set, NULL);
664 723

  
724
    memset(&action, 0, sizeof(action));
725
    action.sa_flags = SA_SIGINFO;
726
    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
727
    sigaction(SIGBUS, &action, NULL);
728
    prctl(PR_MCE_KILL, 1, 1, 0, 0);
729

  
665 730
    return set;
666 731
}
667 732

  
b/kvm-stub.c
141 141
{
142 142
    return -ENOSYS;
143 143
}
144

  
145
int kvm_on_sigbus(int code, void *addr)
146
{
147
    return 1;
148
}
b/kvm.h
110 110

  
111 111
void kvm_arch_reset_vcpu(CPUState *env);
112 112

  
113
int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr);
114
int kvm_on_sigbus(int code, void *addr);
115

  
113 116
struct kvm_guest_debug;
114 117
struct kvm_debug_exit_arch;
115 118

  
b/target-i386/cpu.h
250 250
#define PG_ERROR_RSVD_MASK 0x08
251 251
#define PG_ERROR_I_D_MASK  0x10
252 252

  
253
#define MCG_CTL_P	(1UL<<8)   /* MCG_CAP register available */
253
#define MCG_CTL_P	(1ULL<<8)   /* MCG_CAP register available */
254
#define MCG_SER_P	(1ULL<<24) /* MCA recovery/new status bits */
254 255

  
255
#define MCE_CAP_DEF	MCG_CTL_P
256
#define MCE_CAP_DEF	(MCG_CTL_P|MCG_SER_P)
256 257
#define MCE_BANKS_DEF	10
257 258

  
259
#define MCG_STATUS_RIPV	(1ULL<<0)   /* restart ip valid */
260
#define MCG_STATUS_EIPV	(1ULL<<1)   /* ip points to correct instruction */
258 261
#define MCG_STATUS_MCIP	(1ULL<<2)   /* machine check in progress */
259 262

  
260 263
#define MCI_STATUS_VAL	(1ULL<<63)  /* valid error */
261 264
#define MCI_STATUS_OVER	(1ULL<<62)  /* previous errors lost */
262 265
#define MCI_STATUS_UC	(1ULL<<61)  /* uncorrected error */
266
#define MCI_STATUS_EN	(1ULL<<60)  /* error enabled */
267
#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
268
#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
269
#define MCI_STATUS_PCC	(1ULL<<57)  /* processor context corrupt */
270
#define MCI_STATUS_S	(1ULL<<56)  /* Signaled machine check */
271
#define MCI_STATUS_AR	(1ULL<<55)  /* Action required */
272

  
273
/* MISC register defines */
274
#define MCM_ADDR_SEGOFF	0	/* segment offset */
275
#define MCM_ADDR_LINEAR	1	/* linear address */
276
#define MCM_ADDR_PHYS	2	/* physical address */
277
#define MCM_ADDR_MEM	3	/* memory address */
278
#define MCM_ADDR_GENERIC 7	/* generic */
263 279

  
264 280
#define MSR_IA32_TSC                    0x10
265 281
#define MSR_IA32_APICBASE               0x1b
b/target-i386/helper.c
1032 1032
        return;
1033 1033

  
1034 1034
    if (kvm_enabled()) {
1035
        kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
1035
        kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, 0);
1036 1036
        return;
1037 1037
    }
1038 1038

  
b/target-i386/kvm.c
46 46
#define MSR_KVM_WALL_CLOCK  0x11
47 47
#define MSR_KVM_SYSTEM_TIME 0x12
48 48

  
49
#ifndef BUS_MCEERR_AR
50
#define BUS_MCEERR_AR 4
51
#endif
52
#ifndef BUS_MCEERR_AO
53
#define BUS_MCEERR_AO 5
54
#endif
55

  
49 56
#ifdef KVM_CAP_EXT_CPUID
50 57

  
51 58
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
......
192 199
    return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
193 200
}
194 201

  
202
static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n)
203
{
204
    struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs);
205
    int r;
206

  
207
    kmsrs->nmsrs = n;
208
    memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
209
    r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs);
210
    memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
211
    free(kmsrs);
212
    return r;
213
}
214

  
215
/* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */
216
static int kvm_mce_in_exception(CPUState *env)
217
{
218
    struct kvm_msr_entry msr_mcg_status = {
219
        .index = MSR_MCG_STATUS,
220
    };
221
    int r;
222

  
223
    r = kvm_get_msr(env, &msr_mcg_status, 1);
224
    if (r == -1 || r == 0) {
225
        return -1;
226
    }
227
    return !!(msr_mcg_status.data & MCG_STATUS_MCIP);
228
}
229

  
195 230
struct kvm_x86_mce_data
196 231
{
197 232
    CPUState *env;
198 233
    struct kvm_x86_mce *mce;
234
    int abort_on_error;
199 235
};
200 236

  
201 237
static void kvm_do_inject_x86_mce(void *_data)
......
203 239
    struct kvm_x86_mce_data *data = _data;
204 240
    int r;
205 241

  
242
    /* If there is an MCE excpetion being processed, ignore this SRAO MCE */
243
    r = kvm_mce_in_exception(data->env);
244
    if (r == -1)
245
        fprintf(stderr, "Failed to get MCE status\n");
246
    else if (r && !(data->mce->status & MCI_STATUS_AR))
247
        return;
248

  
206 249
    r = kvm_set_mce(data->env, data->mce);
207
    if (r < 0)
250
    if (r < 0) {
208 251
        perror("kvm_set_mce FAILED");
252
        if (data->abort_on_error) {
253
            abort();
254
        }
255
    }
209 256
}
210 257
#endif
211 258

  
212 259
void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
213
                        uint64_t mcg_status, uint64_t addr, uint64_t misc)
260
                        uint64_t mcg_status, uint64_t addr, uint64_t misc,
261
                        int abort_on_error)
214 262
{
215 263
#ifdef KVM_CAP_MCE
216 264
    struct kvm_x86_mce mce = {
......
225 273
            .mce = &mce,
226 274
    };
227 275

  
276
    if (!cenv->mcg_cap) {
277
        fprintf(stderr, "MCE support is not enabled!\n");
278
        return;
279
    }
280

  
228 281
    run_on_cpu(cenv, kvm_do_inject_x86_mce, &data);
282
#else
283
    if (abort_on_error)
284
        abort();
229 285
#endif
230 286
}
231 287

  
......
1528 1584
              ((env->segs[R_CS].selector  & 3) != 3);
1529 1585
}
1530 1586

  
1587
static void hardware_memory_error(void)
1588
{
1589
    fprintf(stderr, "Hardware memory error!\n");
1590
    exit(1);
1591
}
1592

  
1593
int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
1594
{
1595
#if defined(KVM_CAP_MCE)
1596
    struct kvm_x86_mce mce = {
1597
            .bank = 9,
1598
    };
1599
    void *vaddr;
1600
    ram_addr_t ram_addr;
1601
    target_phys_addr_t paddr;
1602
    int r;
1603

  
1604
    if ((env->mcg_cap & MCG_SER_P) && addr
1605
        && (code == BUS_MCEERR_AR
1606
            || code == BUS_MCEERR_AO)) {
1607
        if (code == BUS_MCEERR_AR) {
1608
            /* Fake an Intel architectural Data Load SRAR UCR */
1609
            mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1610
                | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1611
                | MCI_STATUS_AR | 0x134;
1612
            mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
1613
            mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
1614
        } else {
1615
            /*
1616
             * If there is an MCE excpetion being processed, ignore
1617
             * this SRAO MCE
1618
             */
1619
            r = kvm_mce_in_exception(env);
1620
            if (r == -1) {
1621
                fprintf(stderr, "Failed to get MCE status\n");
1622
            } else if (r) {
1623
                return 0;
1624
            }
1625
            /* Fake an Intel architectural Memory scrubbing UCR */
1626
            mce.status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1627
                | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1628
                | 0xc0;
1629
            mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
1630
            mce.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
1631
        }
1632
        vaddr = (void *)addr;
1633
        if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
1634
            !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) {
1635
            fprintf(stderr, "Hardware memory error for memory used by "
1636
                    "QEMU itself instead of guest system!\n");
1637
            /* Hope we are lucky for AO MCE */
1638
            if (code == BUS_MCEERR_AO) {
1639
                return 0;
1640
            } else {
1641
                hardware_memory_error();
1642
            }
1643
        }
1644
        mce.addr = paddr;
1645
        r = kvm_set_mce(env, &mce);
1646
        if (r < 0) {
1647
            fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
1648
            abort();
1649
        }
1650
    } else
1651
#endif
1652
    {
1653
        if (code == BUS_MCEERR_AO) {
1654
            return 0;
1655
        } else if (code == BUS_MCEERR_AR) {
1656
            hardware_memory_error();
1657
        } else {
1658
            return 1;
1659
        }
1660
    }
1661
    return 0;
1662
}
1663

  
1664
int kvm_on_sigbus(int code, void *addr)
1665
{
1666
#if defined(KVM_CAP_MCE)
1667
    if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
1668
        uint64_t status;
1669
        void *vaddr;
1670
        ram_addr_t ram_addr;
1671
        target_phys_addr_t paddr;
1672
        CPUState *cenv;
1673

  
1674
        /* Hope we are lucky for AO MCE */
1675
        vaddr = addr;
1676
        if (qemu_ram_addr_from_host(vaddr, &ram_addr) ||
1677
            !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, &paddr)) {
1678
            fprintf(stderr, "Hardware memory error for memory used by "
1679
                    "QEMU itself instead of guest system!: %p\n", addr);
1680
            return 0;
1681
        }
1682
        status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN
1683
            | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S
1684
            | 0xc0;
1685
        kvm_inject_x86_mce(first_cpu, 9, status,
1686
                           MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
1687
                           (MCM_ADDR_PHYS << 6) | 0xc, 1);
1688
        for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu) {
1689
            kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
1690
                               MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
1691
        }
1692
    } else
1693
#endif
1694
    {
1695
        if (code == BUS_MCEERR_AO) {
1696
            return 0;
1697
        } else if (code == BUS_MCEERR_AR) {
1698
            hardware_memory_error();
1699
        } else {
1700
            return 1;
1701
        }
1702
    }
1703
    return 0;
1704
}
b/target-i386/kvm_x86.h
16 16
#define __KVM_X86_H__
17 17

  
18 18
void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
19
                        uint64_t mcg_status, uint64_t addr, uint64_t misc);
19
                        uint64_t mcg_status, uint64_t addr, uint64_t misc,
20
                        int abort_on_error);
20 21

  
21 22
#endif

Also available in: Unified diff