Statistics
| Branch: | Revision:

root / target-ppc / kvm.c @ 997aba8e

History | View | Annotate | Download (48.1 kB)

1
/*
2
 * PowerPC implementation of KVM hooks
3
 *
4
 * Copyright IBM Corp. 2007
5
 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6
 *
7
 * Authors:
8
 *  Jerone Young <jyoung5@us.ibm.com>
9
 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10
 *  Hollis Blanchard <hollisb@us.ibm.com>
11
 *
12
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13
 * See the COPYING file in the top-level directory.
14
 *
15
 */
16

    
17
#include <dirent.h>
18
#include <sys/types.h>
19
#include <sys/ioctl.h>
20
#include <sys/mman.h>
21
#include <sys/vfs.h>
22

    
23
#include <linux/kvm.h>
24

    
25
#include "qemu-common.h"
26
#include "qemu/timer.h"
27
#include "sysemu/sysemu.h"
28
#include "sysemu/kvm.h"
29
#include "kvm_ppc.h"
30
#include "cpu.h"
31
#include "sysemu/cpus.h"
32
#include "sysemu/device_tree.h"
33
#include "mmu-hash64.h"
34

    
35
#include "hw/sysbus.h"
36
#include "hw/ppc/spapr.h"
37
#include "hw/ppc/spapr_vio.h"
38
#include "sysemu/watchdog.h"
39

    
40
//#define DEBUG_KVM
41

    
42
#ifdef DEBUG_KVM
43
#define dprintf(fmt, ...) \
44
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45
#else
46
#define dprintf(fmt, ...) \
47
    do { } while (0)
48
#endif
49

    
50
#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
51

    
52
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53
    KVM_CAP_LAST_INFO
54
};
55

    
56
static int cap_interrupt_unset = false;
57
static int cap_interrupt_level = false;
58
static int cap_segstate;
59
static int cap_booke_sregs;
60
static int cap_ppc_smt;
61
static int cap_ppc_rma;
62
static int cap_spapr_tce;
63
static int cap_hior;
64
static int cap_one_reg;
65
static int cap_epr;
66
static int cap_ppc_watchdog;
67
static int cap_papr;
68

    
69
/* XXX We have a race condition where we actually have a level triggered
70
 *     interrupt, but the infrastructure can't expose that yet, so the guest
71
 *     takes but ignores it, goes to sleep and never gets notified that there's
72
 *     still an interrupt pending.
73
 *
74
 *     As a quick workaround, let's just wake up again 20 ms after we injected
75
 *     an interrupt. That way we can assure that we're always reinjecting
76
 *     interrupts in case the guest swallowed them.
77
 */
78
static QEMUTimer *idle_timer;
79

    
80
static void kvm_kick_cpu(void *opaque)
81
{
82
    PowerPCCPU *cpu = opaque;
83

    
84
    qemu_cpu_kick(CPU(cpu));
85
}
86

    
87
static int kvm_ppc_register_host_cpu_type(void);
88

    
89
int kvm_arch_init(KVMState *s)
90
{
91
    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
92
    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
93
    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
94
    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
95
    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
96
    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
97
    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
98
    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
99
    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
100
    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
101
    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
102
    /* Note: we don't set cap_papr here, because this capability is
103
     * only activated after this by kvmppc_set_papr() */
104

    
105
    if (!cap_interrupt_level) {
106
        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
107
                        "VM to stall at times!\n");
108
    }
109

    
110
    kvm_ppc_register_host_cpu_type();
111

    
112
    return 0;
113
}
114

    
115
static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
116
{
117
    CPUPPCState *cenv = &cpu->env;
118
    CPUState *cs = CPU(cpu);
119
    struct kvm_sregs sregs;
120
    int ret;
121

    
122
    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
123
        /* What we're really trying to say is "if we're on BookE, we use
124
           the native PVR for now". This is the only sane way to check
125
           it though, so we potentially confuse users that they can run
126
           BookE guests on BookS. Let's hope nobody dares enough :) */
127
        return 0;
128
    } else {
129
        if (!cap_segstate) {
130
            fprintf(stderr, "kvm error: missing PVR setting capability\n");
131
            return -ENOSYS;
132
        }
133
    }
134

    
135
    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
136
    if (ret) {
137
        return ret;
138
    }
139

    
140
    sregs.pvr = cenv->spr[SPR_PVR];
141
    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
142
}
143

    
144
/* Set up a shared TLB array with KVM */
145
static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
146
{
147
    CPUPPCState *env = &cpu->env;
148
    CPUState *cs = CPU(cpu);
149
    struct kvm_book3e_206_tlb_params params = {};
150
    struct kvm_config_tlb cfg = {};
151
    struct kvm_enable_cap encap = {};
152
    unsigned int entries = 0;
153
    int ret, i;
154

    
155
    if (!kvm_enabled() ||
156
        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
157
        return 0;
158
    }
159

    
160
    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
161

    
162
    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
163
        params.tlb_sizes[i] = booke206_tlb_size(env, i);
164
        params.tlb_ways[i] = booke206_tlb_ways(env, i);
165
        entries += params.tlb_sizes[i];
166
    }
167

    
168
    assert(entries == env->nb_tlb);
169
    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
170

    
171
    env->tlb_dirty = true;
172

    
173
    cfg.array = (uintptr_t)env->tlb.tlbm;
174
    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
175
    cfg.params = (uintptr_t)&params;
176
    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
177

    
178
    encap.cap = KVM_CAP_SW_TLB;
179
    encap.args[0] = (uintptr_t)&cfg;
180

    
181
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
182
    if (ret < 0) {
183
        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184
                __func__, strerror(-ret));
185
        return ret;
186
    }
187

    
188
    env->kvm_sw_tlb = true;
189
    return 0;
190
}
191

    
192

    
193
#if defined(TARGET_PPC64)
194
static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
195
                                       struct kvm_ppc_smmu_info *info)
196
{
197
    CPUPPCState *env = &cpu->env;
198
    CPUState *cs = CPU(cpu);
199

    
200
    memset(info, 0, sizeof(*info));
201

    
202
    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203
     * need to "guess" what the supported page sizes are.
204
     *
205
     * For that to work we make a few assumptions:
206
     *
207
     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208
     *   KVM which only supports 4K and 16M pages, but supports them
209
     *   regardless of the backing store characteritics. We also don't
210
     *   support 1T segments.
211
     *
212
     *   This is safe as if HV KVM ever supports that capability or PR
213
     *   KVM grows supports for more page/segment sizes, those versions
214
     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215
     *   will not hit this fallback
216
     *
217
     * - Else we are running HV KVM. This means we only support page
218
     *   sizes that fit in the backing store. Additionally we only
219
     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
220
     *   P7 encodings for the SLB and hash table. Here too, we assume
221
     *   support for any newer processor will mean a kernel that
222
     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
223
     *   this fallback.
224
     */
225
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
226
        /* No flags */
227
        info->flags = 0;
228
        info->slb_size = 64;
229

    
230
        /* Standard 4k base page size segment */
231
        info->sps[0].page_shift = 12;
232
        info->sps[0].slb_enc = 0;
233
        info->sps[0].enc[0].page_shift = 12;
234
        info->sps[0].enc[0].pte_enc = 0;
235

    
236
        /* Standard 16M large page size segment */
237
        info->sps[1].page_shift = 24;
238
        info->sps[1].slb_enc = SLB_VSID_L;
239
        info->sps[1].enc[0].page_shift = 24;
240
        info->sps[1].enc[0].pte_enc = 0;
241
    } else {
242
        int i = 0;
243

    
244
        /* HV KVM has backing store size restrictions */
245
        info->flags = KVM_PPC_PAGE_SIZES_REAL;
246

    
247
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
248
            info->flags |= KVM_PPC_1T_SEGMENTS;
249
        }
250

    
251
        if (env->mmu_model == POWERPC_MMU_2_06) {
252
            info->slb_size = 32;
253
        } else {
254
            info->slb_size = 64;
255
        }
256

    
257
        /* Standard 4k base page size segment */
258
        info->sps[i].page_shift = 12;
259
        info->sps[i].slb_enc = 0;
260
        info->sps[i].enc[0].page_shift = 12;
261
        info->sps[i].enc[0].pte_enc = 0;
262
        i++;
263

    
264
        /* 64K on MMU 2.06 */
265
        if (env->mmu_model == POWERPC_MMU_2_06) {
266
            info->sps[i].page_shift = 16;
267
            info->sps[i].slb_enc = 0x110;
268
            info->sps[i].enc[0].page_shift = 16;
269
            info->sps[i].enc[0].pte_enc = 1;
270
            i++;
271
        }
272

    
273
        /* Standard 16M large page size segment */
274
        info->sps[i].page_shift = 24;
275
        info->sps[i].slb_enc = SLB_VSID_L;
276
        info->sps[i].enc[0].page_shift = 24;
277
        info->sps[i].enc[0].pte_enc = 0;
278
    }
279
}
280

    
281
static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
282
{
283
    CPUState *cs = CPU(cpu);
284
    int ret;
285

    
286
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
287
        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
288
        if (ret == 0) {
289
            return;
290
        }
291
    }
292

    
293
    kvm_get_fallback_smmu_info(cpu, info);
294
}
295

    
296
static long getrampagesize(void)
297
{
298
    struct statfs fs;
299
    int ret;
300

    
301
    if (!mem_path) {
302
        /* guest RAM is backed by normal anonymous pages */
303
        return getpagesize();
304
    }
305

    
306
    do {
307
        ret = statfs(mem_path, &fs);
308
    } while (ret != 0 && errno == EINTR);
309

    
310
    if (ret != 0) {
311
        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
312
                strerror(errno));
313
        exit(1);
314
    }
315

    
316
#define HUGETLBFS_MAGIC       0x958458f6
317

    
318
    if (fs.f_type != HUGETLBFS_MAGIC) {
319
        /* Explicit mempath, but it's ordinary pages */
320
        return getpagesize();
321
    }
322

    
323
    /* It's hugepage, return the huge page size */
324
    return fs.f_bsize;
325
}
326

    
327
static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
328
{
329
    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
330
        return true;
331
    }
332

    
333
    return (1ul << shift) <= rampgsize;
334
}
335

    
336
static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
337
{
338
    static struct kvm_ppc_smmu_info smmu_info;
339
    static bool has_smmu_info;
340
    CPUPPCState *env = &cpu->env;
341
    long rampagesize;
342
    int iq, ik, jq, jk;
343

    
344
    /* We only handle page sizes for 64-bit server guests for now */
345
    if (!(env->mmu_model & POWERPC_MMU_64)) {
346
        return;
347
    }
348

    
349
    /* Collect MMU info from kernel if not already */
350
    if (!has_smmu_info) {
351
        kvm_get_smmu_info(cpu, &smmu_info);
352
        has_smmu_info = true;
353
    }
354

    
355
    rampagesize = getrampagesize();
356

    
357
    /* Convert to QEMU form */
358
    memset(&env->sps, 0, sizeof(env->sps));
359

    
360
    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
361
        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
362
        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
363

    
364
        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365
                                 ksps->page_shift)) {
366
            continue;
367
        }
368
        qsps->page_shift = ksps->page_shift;
369
        qsps->slb_enc = ksps->slb_enc;
370
        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
371
            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
372
                                     ksps->enc[jk].page_shift)) {
373
                continue;
374
            }
375
            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
376
            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
377
            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
378
                break;
379
            }
380
        }
381
        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
382
            break;
383
        }
384
    }
385
    env->slb_nr = smmu_info.slb_size;
386
    if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
387
        env->mmu_model |= POWERPC_MMU_1TSEG;
388
    } else {
389
        env->mmu_model &= ~POWERPC_MMU_1TSEG;
390
    }
391
}
392
#else /* defined (TARGET_PPC64) */
393

    
394
static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
395
{
396
}
397

    
398
#endif /* !defined (TARGET_PPC64) */
399

    
400
unsigned long kvm_arch_vcpu_id(CPUState *cpu)
401
{
402
    return cpu->cpu_index;
403
}
404

    
405
int kvm_arch_init_vcpu(CPUState *cs)
406
{
407
    PowerPCCPU *cpu = POWERPC_CPU(cs);
408
    CPUPPCState *cenv = &cpu->env;
409
    int ret;
410

    
411
    /* Gather server mmu info from KVM and update the CPU state */
412
    kvm_fixup_page_sizes(cpu);
413

    
414
    /* Synchronize sregs with kvm */
415
    ret = kvm_arch_sync_sregs(cpu);
416
    if (ret) {
417
        return ret;
418
    }
419

    
420
    idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
421

    
422
    /* Some targets support access to KVM's guest TLB. */
423
    switch (cenv->mmu_model) {
424
    case POWERPC_MMU_BOOKE206:
425
        ret = kvm_booke206_tlb_init(cpu);
426
        break;
427
    default:
428
        break;
429
    }
430

    
431
    return ret;
432
}
433

    
434
void kvm_arch_reset_vcpu(CPUState *cpu)
435
{
436
}
437

    
438
static void kvm_sw_tlb_put(PowerPCCPU *cpu)
439
{
440
    CPUPPCState *env = &cpu->env;
441
    CPUState *cs = CPU(cpu);
442
    struct kvm_dirty_tlb dirty_tlb;
443
    unsigned char *bitmap;
444
    int ret;
445

    
446
    if (!env->kvm_sw_tlb) {
447
        return;
448
    }
449

    
450
    bitmap = g_malloc((env->nb_tlb + 7) / 8);
451
    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
452

    
453
    dirty_tlb.bitmap = (uintptr_t)bitmap;
454
    dirty_tlb.num_dirty = env->nb_tlb;
455

    
456
    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
457
    if (ret) {
458
        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
459
                __func__, strerror(-ret));
460
    }
461

    
462
    g_free(bitmap);
463
}
464

    
465
static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
466
{
467
    PowerPCCPU *cpu = POWERPC_CPU(cs);
468
    CPUPPCState *env = &cpu->env;
469
    union {
470
        uint32_t u32;
471
        uint64_t u64;
472
    } val;
473
    struct kvm_one_reg reg = {
474
        .id = id,
475
        .addr = (uintptr_t) &val,
476
    };
477
    int ret;
478

    
479
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
480
    if (ret != 0) {
481
        fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
482
                spr, strerror(errno));
483
    } else {
484
        switch (id & KVM_REG_SIZE_MASK) {
485
        case KVM_REG_SIZE_U32:
486
            env->spr[spr] = val.u32;
487
            break;
488

    
489
        case KVM_REG_SIZE_U64:
490
            env->spr[spr] = val.u64;
491
            break;
492

    
493
        default:
494
            /* Don't handle this size yet */
495
            abort();
496
        }
497
    }
498
}
499

    
500
static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
501
{
502
    PowerPCCPU *cpu = POWERPC_CPU(cs);
503
    CPUPPCState *env = &cpu->env;
504
    union {
505
        uint32_t u32;
506
        uint64_t u64;
507
    } val;
508
    struct kvm_one_reg reg = {
509
        .id = id,
510
        .addr = (uintptr_t) &val,
511
    };
512
    int ret;
513

    
514
    switch (id & KVM_REG_SIZE_MASK) {
515
    case KVM_REG_SIZE_U32:
516
        val.u32 = env->spr[spr];
517
        break;
518

    
519
    case KVM_REG_SIZE_U64:
520
        val.u64 = env->spr[spr];
521
        break;
522

    
523
    default:
524
        /* Don't handle this size yet */
525
        abort();
526
    }
527

    
528
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
529
    if (ret != 0) {
530
        fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
531
                spr, strerror(errno));
532
    }
533
}
534

    
535
static int kvm_put_fp(CPUState *cs)
536
{
537
    PowerPCCPU *cpu = POWERPC_CPU(cs);
538
    CPUPPCState *env = &cpu->env;
539
    struct kvm_one_reg reg;
540
    int i;
541
    int ret;
542

    
543
    if (env->insns_flags & PPC_FLOAT) {
544
        uint64_t fpscr = env->fpscr;
545
        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
546

    
547
        reg.id = KVM_REG_PPC_FPSCR;
548
        reg.addr = (uintptr_t)&fpscr;
549
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
550
        if (ret < 0) {
551
            dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
552
            return ret;
553
        }
554

    
555
        for (i = 0; i < 32; i++) {
556
            uint64_t vsr[2];
557

    
558
            vsr[0] = float64_val(env->fpr[i]);
559
            vsr[1] = env->vsr[i];
560
            reg.addr = (uintptr_t) &vsr;
561
            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
562

    
563
            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
564
            if (ret < 0) {
565
                dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
566
                        i, strerror(errno));
567
                return ret;
568
            }
569
        }
570
    }
571

    
572
    if (env->insns_flags & PPC_ALTIVEC) {
573
        reg.id = KVM_REG_PPC_VSCR;
574
        reg.addr = (uintptr_t)&env->vscr;
575
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
576
        if (ret < 0) {
577
            dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
578
            return ret;
579
        }
580

    
581
        for (i = 0; i < 32; i++) {
582
            reg.id = KVM_REG_PPC_VR(i);
583
            reg.addr = (uintptr_t)&env->avr[i];
584
            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
585
            if (ret < 0) {
586
                dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
587
                return ret;
588
            }
589
        }
590
    }
591

    
592
    return 0;
593
}
594

    
595
static int kvm_get_fp(CPUState *cs)
596
{
597
    PowerPCCPU *cpu = POWERPC_CPU(cs);
598
    CPUPPCState *env = &cpu->env;
599
    struct kvm_one_reg reg;
600
    int i;
601
    int ret;
602

    
603
    if (env->insns_flags & PPC_FLOAT) {
604
        uint64_t fpscr;
605
        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
606

    
607
        reg.id = KVM_REG_PPC_FPSCR;
608
        reg.addr = (uintptr_t)&fpscr;
609
        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
610
        if (ret < 0) {
611
            dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
612
            return ret;
613
        } else {
614
            env->fpscr = fpscr;
615
        }
616

    
617
        for (i = 0; i < 32; i++) {
618
            uint64_t vsr[2];
619

    
620
            reg.addr = (uintptr_t) &vsr;
621
            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
622

    
623
            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
624
            if (ret < 0) {
625
                dprintf("Unable to get %s%d from KVM: %s\n",
626
                        vsx ? "VSR" : "FPR", i, strerror(errno));
627
                return ret;
628
            } else {
629
                env->fpr[i] = vsr[0];
630
                if (vsx) {
631
                    env->vsr[i] = vsr[1];
632
                }
633
            }
634
        }
635
    }
636

    
637
    if (env->insns_flags & PPC_ALTIVEC) {
638
        reg.id = KVM_REG_PPC_VSCR;
639
        reg.addr = (uintptr_t)&env->vscr;
640
        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
641
        if (ret < 0) {
642
            dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
643
            return ret;
644
        }
645

    
646
        for (i = 0; i < 32; i++) {
647
            reg.id = KVM_REG_PPC_VR(i);
648
            reg.addr = (uintptr_t)&env->avr[i];
649
            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650
            if (ret < 0) {
651
                dprintf("Unable to get VR%d from KVM: %s\n",
652
                        i, strerror(errno));
653
                return ret;
654
            }
655
        }
656
    }
657

    
658
    return 0;
659
}
660

    
661
#if defined(TARGET_PPC64)
662
static int kvm_get_vpa(CPUState *cs)
663
{
664
    PowerPCCPU *cpu = POWERPC_CPU(cs);
665
    CPUPPCState *env = &cpu->env;
666
    struct kvm_one_reg reg;
667
    int ret;
668

    
669
    reg.id = KVM_REG_PPC_VPA_ADDR;
670
    reg.addr = (uintptr_t)&env->vpa_addr;
671
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
672
    if (ret < 0) {
673
        dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno));
674
        return ret;
675
    }
676

    
677
    assert((uintptr_t)&env->slb_shadow_size
678
           == ((uintptr_t)&env->slb_shadow_addr + 8));
679
    reg.id = KVM_REG_PPC_VPA_SLB;
680
    reg.addr = (uintptr_t)&env->slb_shadow_addr;
681
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
682
    if (ret < 0) {
683
        dprintf("Unable to get SLB shadow state from KVM: %s\n",
684
                strerror(errno));
685
        return ret;
686
    }
687

    
688
    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
689
    reg.id = KVM_REG_PPC_VPA_DTL;
690
    reg.addr = (uintptr_t)&env->dtl_addr;
691
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
692
    if (ret < 0) {
693
        dprintf("Unable to get dispatch trace log state from KVM: %s\n",
694
                strerror(errno));
695
        return ret;
696
    }
697

    
698
    return 0;
699
}
700

    
701
static int kvm_put_vpa(CPUState *cs)
702
{
703
    PowerPCCPU *cpu = POWERPC_CPU(cs);
704
    CPUPPCState *env = &cpu->env;
705
    struct kvm_one_reg reg;
706
    int ret;
707

    
708
    /* SLB shadow or DTL can't be registered unless a master VPA is
709
     * registered.  That means when restoring state, if a VPA *is*
710
     * registered, we need to set that up first.  If not, we need to
711
     * deregister the others before deregistering the master VPA */
712
    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
713

    
714
    if (env->vpa_addr) {
715
        reg.id = KVM_REG_PPC_VPA_ADDR;
716
        reg.addr = (uintptr_t)&env->vpa_addr;
717
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718
        if (ret < 0) {
719
            dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
720
            return ret;
721
        }
722
    }
723

    
724
    assert((uintptr_t)&env->slb_shadow_size
725
           == ((uintptr_t)&env->slb_shadow_addr + 8));
726
    reg.id = KVM_REG_PPC_VPA_SLB;
727
    reg.addr = (uintptr_t)&env->slb_shadow_addr;
728
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729
    if (ret < 0) {
730
        dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
731
        return ret;
732
    }
733

    
734
    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
735
    reg.id = KVM_REG_PPC_VPA_DTL;
736
    reg.addr = (uintptr_t)&env->dtl_addr;
737
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
738
    if (ret < 0) {
739
        dprintf("Unable to set dispatch trace log state to KVM: %s\n",
740
                strerror(errno));
741
        return ret;
742
    }
743

    
744
    if (!env->vpa_addr) {
745
        reg.id = KVM_REG_PPC_VPA_ADDR;
746
        reg.addr = (uintptr_t)&env->vpa_addr;
747
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
748
        if (ret < 0) {
749
            dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
750
            return ret;
751
        }
752
    }
753

    
754
    return 0;
755
}
756
#endif /* TARGET_PPC64 */
757

    
758
int kvm_arch_put_registers(CPUState *cs, int level)
759
{
760
    PowerPCCPU *cpu = POWERPC_CPU(cs);
761
    CPUPPCState *env = &cpu->env;
762
    struct kvm_regs regs;
763
    int ret;
764
    int i;
765

    
766
    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
767
    if (ret < 0) {
768
        return ret;
769
    }
770

    
771
    regs.ctr = env->ctr;
772
    regs.lr  = env->lr;
773
    regs.xer = cpu_read_xer(env);
774
    regs.msr = env->msr;
775
    regs.pc = env->nip;
776

    
777
    regs.srr0 = env->spr[SPR_SRR0];
778
    regs.srr1 = env->spr[SPR_SRR1];
779

    
780
    regs.sprg0 = env->spr[SPR_SPRG0];
781
    regs.sprg1 = env->spr[SPR_SPRG1];
782
    regs.sprg2 = env->spr[SPR_SPRG2];
783
    regs.sprg3 = env->spr[SPR_SPRG3];
784
    regs.sprg4 = env->spr[SPR_SPRG4];
785
    regs.sprg5 = env->spr[SPR_SPRG5];
786
    regs.sprg6 = env->spr[SPR_SPRG6];
787
    regs.sprg7 = env->spr[SPR_SPRG7];
788

    
789
    regs.pid = env->spr[SPR_BOOKE_PID];
790

    
791
    for (i = 0;i < 32; i++)
792
        regs.gpr[i] = env->gpr[i];
793

    
794
    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
795
    if (ret < 0)
796
        return ret;
797

    
798
    kvm_put_fp(cs);
799

    
800
    if (env->tlb_dirty) {
801
        kvm_sw_tlb_put(cpu);
802
        env->tlb_dirty = false;
803
    }
804

    
805
    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
806
        struct kvm_sregs sregs;
807

    
808
        sregs.pvr = env->spr[SPR_PVR];
809

    
810
        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
811

    
812
        /* Sync SLB */
813
#ifdef TARGET_PPC64
814
        for (i = 0; i < 64; i++) {
815
            sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
816
            sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
817
        }
818
#endif
819

    
820
        /* Sync SRs */
821
        for (i = 0; i < 16; i++) {
822
            sregs.u.s.ppc32.sr[i] = env->sr[i];
823
        }
824

    
825
        /* Sync BATs */
826
        for (i = 0; i < 8; i++) {
827
            /* Beware. We have to swap upper and lower bits here */
828
            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
829
                | env->DBAT[1][i];
830
            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
831
                | env->IBAT[1][i];
832
        }
833

    
834
        ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
835
        if (ret) {
836
            return ret;
837
        }
838
    }
839

    
840
    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
841
        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
842
    }
843

    
844
    if (cap_one_reg) {
845
        int i;
846

    
847
        /* We deliberately ignore errors here, for kernels which have
848
         * the ONE_REG calls, but don't support the specific
849
         * registers, there's a reasonable chance things will still
850
         * work, at least until we try to migrate. */
851
        for (i = 0; i < 1024; i++) {
852
            uint64_t id = env->spr_cb[i].one_reg_id;
853

    
854
            if (id != 0) {
855
                kvm_put_one_spr(cs, id, i);
856
            }
857
        }
858

    
859
#ifdef TARGET_PPC64
860
        if (cap_papr) {
861
            if (kvm_put_vpa(cs) < 0) {
862
                dprintf("Warning: Unable to set VPA information to KVM\n");
863
            }
864
        }
865
#endif /* TARGET_PPC64 */
866
    }
867

    
868
    return ret;
869
}
870

    
871
int kvm_arch_get_registers(CPUState *cs)
872
{
873
    PowerPCCPU *cpu = POWERPC_CPU(cs);
874
    CPUPPCState *env = &cpu->env;
875
    struct kvm_regs regs;
876
    struct kvm_sregs sregs;
877
    uint32_t cr;
878
    int i, ret;
879

    
880
    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
881
    if (ret < 0)
882
        return ret;
883

    
884
    cr = regs.cr;
885
    for (i = 7; i >= 0; i--) {
886
        env->crf[i] = cr & 15;
887
        cr >>= 4;
888
    }
889

    
890
    env->ctr = regs.ctr;
891
    env->lr = regs.lr;
892
    cpu_write_xer(env, regs.xer);
893
    env->msr = regs.msr;
894
    env->nip = regs.pc;
895

    
896
    env->spr[SPR_SRR0] = regs.srr0;
897
    env->spr[SPR_SRR1] = regs.srr1;
898

    
899
    env->spr[SPR_SPRG0] = regs.sprg0;
900
    env->spr[SPR_SPRG1] = regs.sprg1;
901
    env->spr[SPR_SPRG2] = regs.sprg2;
902
    env->spr[SPR_SPRG3] = regs.sprg3;
903
    env->spr[SPR_SPRG4] = regs.sprg4;
904
    env->spr[SPR_SPRG5] = regs.sprg5;
905
    env->spr[SPR_SPRG6] = regs.sprg6;
906
    env->spr[SPR_SPRG7] = regs.sprg7;
907

    
908
    env->spr[SPR_BOOKE_PID] = regs.pid;
909

    
910
    for (i = 0;i < 32; i++)
911
        env->gpr[i] = regs.gpr[i];
912

    
913
    kvm_get_fp(cs);
914

    
915
    if (cap_booke_sregs) {
916
        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
917
        if (ret < 0) {
918
            return ret;
919
        }
920

    
921
        if (sregs.u.e.features & KVM_SREGS_E_BASE) {
922
            env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
923
            env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
924
            env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
925
            env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
926
            env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
927
            env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
928
            env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
929
            env->spr[SPR_DECR] = sregs.u.e.dec;
930
            env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
931
            env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
932
            env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
933
        }
934

    
935
        if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
936
            env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
937
            env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
938
            env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
939
            env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
940
            env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
941
        }
942

    
943
        if (sregs.u.e.features & KVM_SREGS_E_64) {
944
            env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
945
        }
946

    
947
        if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
948
            env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
949
        }
950

    
951
        if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
952
            env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
953
            env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
954
            env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
955
            env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
956
            env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
957
            env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
958
            env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
959
            env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
960
            env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
961
            env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
962
            env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
963
            env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
964
            env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
965
            env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
966
            env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
967
            env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
968

    
969
            if (sregs.u.e.features & KVM_SREGS_E_SPE) {
970
                env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
971
                env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
972
                env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
973
            }
974

    
975
            if (sregs.u.e.features & KVM_SREGS_E_PM) {
976
                env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
977
            }
978

    
979
            if (sregs.u.e.features & KVM_SREGS_E_PC) {
980
                env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
981
                env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
982
            }
983
        }
984

    
985
        if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
986
            env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
987
            env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
988
            env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
989
            env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
990
            env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
991
            env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
992
            env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
993
            env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
994
            env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
995
            env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
996
        }
997

    
998
        if (sregs.u.e.features & KVM_SREGS_EXP) {
999
            env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1000
        }
1001

    
1002
        if (sregs.u.e.features & KVM_SREGS_E_PD) {
1003
            env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1004
            env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1005
        }
1006

    
1007
        if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1008
            env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1009
            env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1010
            env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1011

    
1012
            if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1013
                env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1014
                env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1015
            }
1016
        }
1017
    }
1018

    
1019
    if (cap_segstate) {
1020
        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1021
        if (ret < 0) {
1022
            return ret;
1023
        }
1024

    
1025
        ppc_store_sdr1(env, sregs.u.s.sdr1);
1026

    
1027
        /* Sync SLB */
1028
#ifdef TARGET_PPC64
1029
        for (i = 0; i < 64; i++) {
1030
            ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
1031
                               sregs.u.s.ppc64.slb[i].slbv);
1032
        }
1033
#endif
1034

    
1035
        /* Sync SRs */
1036
        for (i = 0; i < 16; i++) {
1037
            env->sr[i] = sregs.u.s.ppc32.sr[i];
1038
        }
1039

    
1040
        /* Sync BATs */
1041
        for (i = 0; i < 8; i++) {
1042
            env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1043
            env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1044
            env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1045
            env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1046
        }
1047
    }
1048

    
1049
    if (cap_hior) {
1050
        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1051
    }
1052

    
1053
    if (cap_one_reg) {
1054
        int i;
1055

    
1056
        /* We deliberately ignore errors here, for kernels which have
1057
         * the ONE_REG calls, but don't support the specific
1058
         * registers, there's a reasonable chance things will still
1059
         * work, at least until we try to migrate. */
1060
        for (i = 0; i < 1024; i++) {
1061
            uint64_t id = env->spr_cb[i].one_reg_id;
1062

    
1063
            if (id != 0) {
1064
                kvm_get_one_spr(cs, id, i);
1065
            }
1066
        }
1067

    
1068
#ifdef TARGET_PPC64
1069
        if (cap_papr) {
1070
            if (kvm_get_vpa(cs) < 0) {
1071
                dprintf("Warning: Unable to get VPA information from KVM\n");
1072
            }
1073
        }
1074
#endif
1075
    }
1076

    
1077
    return 0;
1078
}
1079

    
1080
int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1081
{
1082
    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1083

    
1084
    if (irq != PPC_INTERRUPT_EXT) {
1085
        return 0;
1086
    }
1087

    
1088
    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1089
        return 0;
1090
    }
1091

    
1092
    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1093

    
1094
    return 0;
1095
}
1096

    
1097
#if defined(TARGET_PPCEMB)
1098
#define PPC_INPUT_INT PPC40x_INPUT_INT
1099
#elif defined(TARGET_PPC64)
1100
#define PPC_INPUT_INT PPC970_INPUT_INT
1101
#else
1102
#define PPC_INPUT_INT PPC6xx_INPUT_INT
1103
#endif
1104

    
1105
void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1106
{
1107
    PowerPCCPU *cpu = POWERPC_CPU(cs);
1108
    CPUPPCState *env = &cpu->env;
1109
    int r;
1110
    unsigned irq;
1111

    
1112
    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1113
     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1114
    if (!cap_interrupt_level &&
1115
        run->ready_for_interrupt_injection &&
1116
        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1117
        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1118
    {
1119
        /* For now KVM disregards the 'irq' argument. However, in the
1120
         * future KVM could cache it in-kernel to avoid a heavyweight exit
1121
         * when reading the UIC.
1122
         */
1123
        irq = KVM_INTERRUPT_SET;
1124

    
1125
        dprintf("injected interrupt %d\n", irq);
1126
        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1127
        if (r < 0) {
1128
            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1129
        }
1130

    
1131
        /* Always wake up soon in case the interrupt was level based */
1132
        qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1133
                       (get_ticks_per_sec() / 50));
1134
    }
1135

    
1136
    /* We don't know if there are more interrupts pending after this. However,
1137
     * the guest will return to userspace in the course of handling this one
1138
     * anyways, so we will get a chance to deliver the rest. */
1139
}
1140

    
1141
void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1142
{
1143
}
1144

    
1145
int kvm_arch_process_async_events(CPUState *cs)
1146
{
1147
    return cs->halted;
1148
}
1149

    
1150
static int kvmppc_handle_halt(PowerPCCPU *cpu)
1151
{
1152
    CPUState *cs = CPU(cpu);
1153
    CPUPPCState *env = &cpu->env;
1154

    
1155
    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1156
        cs->halted = 1;
1157
        env->exception_index = EXCP_HLT;
1158
    }
1159

    
1160
    return 0;
1161
}
1162

    
1163
/* map dcr access to existing qemu dcr emulation */
1164
static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1165
{
1166
    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1167
        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1168

    
1169
    return 0;
1170
}
1171

    
1172
static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1173
{
1174
    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1175
        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1176

    
1177
    return 0;
1178
}
1179

    
1180
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1181
{
1182
    PowerPCCPU *cpu = POWERPC_CPU(cs);
1183
    CPUPPCState *env = &cpu->env;
1184
    int ret;
1185

    
1186
    switch (run->exit_reason) {
1187
    case KVM_EXIT_DCR:
1188
        if (run->dcr.is_write) {
1189
            dprintf("handle dcr write\n");
1190
            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1191
        } else {
1192
            dprintf("handle dcr read\n");
1193
            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1194
        }
1195
        break;
1196
    case KVM_EXIT_HLT:
1197
        dprintf("handle halt\n");
1198
        ret = kvmppc_handle_halt(cpu);
1199
        break;
1200
#if defined(TARGET_PPC64)
1201
    case KVM_EXIT_PAPR_HCALL:
1202
        dprintf("handle PAPR hypercall\n");
1203
        run->papr_hcall.ret = spapr_hypercall(cpu,
1204
                                              run->papr_hcall.nr,
1205
                                              run->papr_hcall.args);
1206
        ret = 0;
1207
        break;
1208
#endif
1209
    case KVM_EXIT_EPR:
1210
        dprintf("handle epr\n");
1211
        run->epr.epr = ldl_phys(env->mpic_iack);
1212
        ret = 0;
1213
        break;
1214
    case KVM_EXIT_WATCHDOG:
1215
        dprintf("handle watchdog expiry\n");
1216
        watchdog_perform_action();
1217
        ret = 0;
1218
        break;
1219

    
1220
    default:
1221
        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1222
        ret = -1;
1223
        break;
1224
    }
1225

    
1226
    return ret;
1227
}
1228

    
1229
int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1230
{
1231
    CPUState *cs = CPU(cpu);
1232
    uint32_t bits = tsr_bits;
1233
    struct kvm_one_reg reg = {
1234
        .id = KVM_REG_PPC_OR_TSR,
1235
        .addr = (uintptr_t) &bits,
1236
    };
1237

    
1238
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1239
}
1240

    
1241
int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1242
{
1243

    
1244
    CPUState *cs = CPU(cpu);
1245
    uint32_t bits = tsr_bits;
1246
    struct kvm_one_reg reg = {
1247
        .id = KVM_REG_PPC_CLEAR_TSR,
1248
        .addr = (uintptr_t) &bits,
1249
    };
1250

    
1251
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1252
}
1253

    
1254
int kvmppc_set_tcr(PowerPCCPU *cpu)
1255
{
1256
    CPUState *cs = CPU(cpu);
1257
    CPUPPCState *env = &cpu->env;
1258
    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1259

    
1260
    struct kvm_one_reg reg = {
1261
        .id = KVM_REG_PPC_TCR,
1262
        .addr = (uintptr_t) &tcr,
1263
    };
1264

    
1265
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1266
}
1267

    
1268
int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1269
{
1270
    CPUState *cs = CPU(cpu);
1271
    struct kvm_enable_cap encap = {};
1272
    int ret;
1273

    
1274
    if (!kvm_enabled()) {
1275
        return -1;
1276
    }
1277

    
1278
    if (!cap_ppc_watchdog) {
1279
        printf("warning: KVM does not support watchdog");
1280
        return -1;
1281
    }
1282

    
1283
    encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1284
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1285
    if (ret < 0) {
1286
        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1287
                __func__, strerror(-ret));
1288
        return ret;
1289
    }
1290

    
1291
    return ret;
1292
}
1293

    
1294
static int read_cpuinfo(const char *field, char *value, int len)
1295
{
1296
    FILE *f;
1297
    int ret = -1;
1298
    int field_len = strlen(field);
1299
    char line[512];
1300

    
1301
    f = fopen("/proc/cpuinfo", "r");
1302
    if (!f) {
1303
        return -1;
1304
    }
1305

    
1306
    do {
1307
        if(!fgets(line, sizeof(line), f)) {
1308
            break;
1309
        }
1310
        if (!strncmp(line, field, field_len)) {
1311
            pstrcpy(value, len, line);
1312
            ret = 0;
1313
            break;
1314
        }
1315
    } while(*line);
1316

    
1317
    fclose(f);
1318

    
1319
    return ret;
1320
}
1321

    
1322
uint32_t kvmppc_get_tbfreq(void)
1323
{
1324
    char line[512];
1325
    char *ns;
1326
    uint32_t retval = get_ticks_per_sec();
1327

    
1328
    if (read_cpuinfo("timebase", line, sizeof(line))) {
1329
        return retval;
1330
    }
1331

    
1332
    if (!(ns = strchr(line, ':'))) {
1333
        return retval;
1334
    }
1335

    
1336
    ns++;
1337

    
1338
    retval = atoi(ns);
1339
    return retval;
1340
}
1341

    
1342
/* Try to find a device tree node for a CPU with clock-frequency property */
1343
static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1344
{
1345
    struct dirent *dirp;
1346
    DIR *dp;
1347

    
1348
    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1349
        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1350
        return -1;
1351
    }
1352

    
1353
    buf[0] = '\0';
1354
    while ((dirp = readdir(dp)) != NULL) {
1355
        FILE *f;
1356
        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1357
                 dirp->d_name);
1358
        f = fopen(buf, "r");
1359
        if (f) {
1360
            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1361
            fclose(f);
1362
            break;
1363
        }
1364
        buf[0] = '\0';
1365
    }
1366
    closedir(dp);
1367
    if (buf[0] == '\0') {
1368
        printf("Unknown host!\n");
1369
        return -1;
1370
    }
1371

    
1372
    return 0;
1373
}
1374

    
1375
/* Read a CPU node property from the host device tree that's a single
1376
 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1377
 * (can't find or open the property, or doesn't understand the
1378
 * format) */
1379
static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1380
{
1381
    char buf[PATH_MAX];
1382
    union {
1383
        uint32_t v32;
1384
        uint64_t v64;
1385
    } u;
1386
    FILE *f;
1387
    int len;
1388

    
1389
    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1390
        return -1;
1391
    }
1392

    
1393
    strncat(buf, "/", sizeof(buf) - strlen(buf));
1394
    strncat(buf, propname, sizeof(buf) - strlen(buf));
1395

    
1396
    f = fopen(buf, "rb");
1397
    if (!f) {
1398
        return -1;
1399
    }
1400

    
1401
    len = fread(&u, 1, sizeof(u), f);
1402
    fclose(f);
1403
    switch (len) {
1404
    case 4:
1405
        /* property is a 32-bit quantity */
1406
        return be32_to_cpu(u.v32);
1407
    case 8:
1408
        return be64_to_cpu(u.v64);
1409
    }
1410

    
1411
    return 0;
1412
}
1413

    
1414
uint64_t kvmppc_get_clockfreq(void)
1415
{
1416
    return kvmppc_read_int_cpu_dt("clock-frequency");
1417
}
1418

    
1419
uint32_t kvmppc_get_vmx(void)
1420
{
1421
    return kvmppc_read_int_cpu_dt("ibm,vmx");
1422
}
1423

    
1424
uint32_t kvmppc_get_dfp(void)
1425
{
1426
    return kvmppc_read_int_cpu_dt("ibm,dfp");
1427
}
1428

    
1429
static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1430
 {
1431
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1432
     CPUState *cs = CPU(cpu);
1433

    
1434
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1435
        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1436
        return 0;
1437
    }
1438

    
1439
    return 1;
1440
}
1441

    
1442
int kvmppc_get_hasidle(CPUPPCState *env)
1443
{
1444
    struct kvm_ppc_pvinfo pvinfo;
1445

    
1446
    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1447
        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1448
        return 1;
1449
    }
1450

    
1451
    return 0;
1452
}
1453

    
1454
int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1455
{
1456
    uint32_t *hc = (uint32_t*)buf;
1457
    struct kvm_ppc_pvinfo pvinfo;
1458

    
1459
    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1460
        memcpy(buf, pvinfo.hcall, buf_len);
1461
        return 0;
1462
    }
1463

    
1464
    /*
1465
     * Fallback to always fail hypercalls:
1466
     *
1467
     *     li r3, -1
1468
     *     nop
1469
     *     nop
1470
     *     nop
1471
     */
1472

    
1473
    hc[0] = 0x3860ffff;
1474
    hc[1] = 0x60000000;
1475
    hc[2] = 0x60000000;
1476
    hc[3] = 0x60000000;
1477

    
1478
    return 0;
1479
}
1480

    
1481
void kvmppc_set_papr(PowerPCCPU *cpu)
1482
{
1483
    CPUPPCState *env = &cpu->env;
1484
    CPUState *cs = CPU(cpu);
1485
    struct kvm_enable_cap cap = {};
1486
    int ret;
1487

    
1488
    cap.cap = KVM_CAP_PPC_PAPR;
1489
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1490

    
1491
    if (ret) {
1492
        cpu_abort(env, "This KVM version does not support PAPR\n");
1493
    }
1494

    
1495
    /* Update the capability flag so we sync the right information
1496
     * with kvm */
1497
    cap_papr = 1;
1498
}
1499

    
1500
void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1501
{
1502
    CPUPPCState *env = &cpu->env;
1503
    CPUState *cs = CPU(cpu);
1504
    struct kvm_enable_cap cap = {};
1505
    int ret;
1506

    
1507
    cap.cap = KVM_CAP_PPC_EPR;
1508
    cap.args[0] = mpic_proxy;
1509
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1510

    
1511
    if (ret && mpic_proxy) {
1512
        cpu_abort(env, "This KVM version does not support EPR\n");
1513
    }
1514
}
1515

    
1516
int kvmppc_smt_threads(void)
1517
{
1518
    return cap_ppc_smt ? cap_ppc_smt : 1;
1519
}
1520

    
1521
#ifdef TARGET_PPC64
1522
off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1523
{
1524
    void *rma;
1525
    off_t size;
1526
    int fd;
1527
    struct kvm_allocate_rma ret;
1528
    MemoryRegion *rma_region;
1529

    
1530
    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1531
     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1532
     *                      not necessary on this hardware
1533
     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1534
     *
1535
     * FIXME: We should allow the user to force contiguous RMA
1536
     * allocation in the cap_ppc_rma==1 case.
1537
     */
1538
    if (cap_ppc_rma < 2) {
1539
        return 0;
1540
    }
1541

    
1542
    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1543
    if (fd < 0) {
1544
        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1545
                strerror(errno));
1546
        return -1;
1547
    }
1548

    
1549
    size = MIN(ret.rma_size, 256ul << 20);
1550

    
1551
    rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1552
    if (rma == MAP_FAILED) {
1553
        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1554
        return -1;
1555
    };
1556

    
1557
    rma_region = g_new(MemoryRegion, 1);
1558
    memory_region_init_ram_ptr(rma_region, name, size, rma);
1559
    vmstate_register_ram_global(rma_region);
1560
    memory_region_add_subregion(sysmem, 0, rma_region);
1561

    
1562
    return size;
1563
}
1564

    
1565
uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1566
{
1567
    struct kvm_ppc_smmu_info info;
1568
    long rampagesize, best_page_shift;
1569
    int i;
1570

    
1571
    if (cap_ppc_rma >= 2) {
1572
        return current_size;
1573
    }
1574

    
1575
    /* Find the largest hardware supported page size that's less than
1576
     * or equal to the (logical) backing page size of guest RAM */
1577
    kvm_get_smmu_info(ppc_env_get_cpu(first_cpu), &info);
1578
    rampagesize = getrampagesize();
1579
    best_page_shift = 0;
1580

    
1581
    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1582
        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1583

    
1584
        if (!sps->page_shift) {
1585
            continue;
1586
        }
1587

    
1588
        if ((sps->page_shift > best_page_shift)
1589
            && ((1UL << sps->page_shift) <= rampagesize)) {
1590
            best_page_shift = sps->page_shift;
1591
        }
1592
    }
1593

    
1594
    return MIN(current_size,
1595
               1ULL << (best_page_shift + hash_shift - 7));
1596
}
1597
#endif
1598

    
1599
void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1600
{
1601
    struct kvm_create_spapr_tce args = {
1602
        .liobn = liobn,
1603
        .window_size = window_size,
1604
    };
1605
    long len;
1606
    int fd;
1607
    void *table;
1608

    
1609
    /* Must set fd to -1 so we don't try to munmap when called for
1610
     * destroying the table, which the upper layers -will- do
1611
     */
1612
    *pfd = -1;
1613
    if (!cap_spapr_tce) {
1614
        return NULL;
1615
    }
1616

    
1617
    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1618
    if (fd < 0) {
1619
        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1620
                liobn);
1621
        return NULL;
1622
    }
1623

    
1624
    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1625
    /* FIXME: round this up to page size */
1626

    
1627
    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1628
    if (table == MAP_FAILED) {
1629
        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1630
                liobn);
1631
        close(fd);
1632
        return NULL;
1633
    }
1634

    
1635
    *pfd = fd;
1636
    return table;
1637
}
1638

    
1639
int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1640
{
1641
    long len;
1642

    
1643
    if (fd < 0) {
1644
        return -1;
1645
    }
1646

    
1647
    len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1648
    if ((munmap(table, len) < 0) ||
1649
        (close(fd) < 0)) {
1650
        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1651
                strerror(errno));
1652
        /* Leak the table */
1653
    }
1654

    
1655
    return 0;
1656
}
1657

    
1658
int kvmppc_reset_htab(int shift_hint)
1659
{
1660
    uint32_t shift = shift_hint;
1661

    
1662
    if (!kvm_enabled()) {
1663
        /* Full emulation, tell caller to allocate htab itself */
1664
        return 0;
1665
    }
1666
    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1667
        int ret;
1668
        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1669
        if (ret == -ENOTTY) {
1670
            /* At least some versions of PR KVM advertise the
1671
             * capability, but don't implement the ioctl().  Oops.
1672
             * Return 0 so that we allocate the htab in qemu, as is
1673
             * correct for PR. */
1674
            return 0;
1675
        } else if (ret < 0) {
1676
            return ret;
1677
        }
1678
        return shift;
1679
    }
1680

    
1681
    /* We have a kernel that predates the htab reset calls.  For PR
1682
     * KVM, we need to allocate the htab ourselves, for an HV KVM of
1683
     * this era, it has allocated a 16MB fixed size hash table
1684
     * already.  Kernels of this era have the GET_PVINFO capability
1685
     * only on PR, so we use this hack to determine the right
1686
     * answer */
1687
    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1688
        /* PR - tell caller to allocate htab */
1689
        return 0;
1690
    } else {
1691
        /* HV - assume 16MB kernel allocated htab */
1692
        return 24;
1693
    }
1694
}
1695

    
1696
static inline uint32_t mfpvr(void)
1697
{
1698
    uint32_t pvr;
1699

    
1700
    asm ("mfpvr %0"
1701
         : "=r"(pvr));
1702
    return pvr;
1703
}
1704

    
1705
static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1706
{
1707
    if (on) {
1708
        *word |= flags;
1709
    } else {
1710
        *word &= ~flags;
1711
    }
1712
}
1713

    
1714
static void kvmppc_host_cpu_initfn(Object *obj)
1715
{
1716
    assert(kvm_enabled());
1717
}
1718

    
1719
static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1720
{
1721
    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1722
    uint32_t vmx = kvmppc_get_vmx();
1723
    uint32_t dfp = kvmppc_get_dfp();
1724
    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1725
    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1726

    
1727
    /* Now fix up the class with information we can query from the host */
1728

    
1729
    if (vmx != -1) {
1730
        /* Only override when we know what the host supports */
1731
        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1732
        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1733
    }
1734
    if (dfp != -1) {
1735
        /* Only override when we know what the host supports */
1736
        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1737
    }
1738

    
1739
    if (dcache_size != -1) {
1740
        pcc->l1_dcache_size = dcache_size;
1741
    }
1742

    
1743
    if (icache_size != -1) {
1744
        pcc->l1_icache_size = icache_size;
1745
    }
1746
}
1747

    
1748
int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1749
{
1750
    CPUState *cs = CPU(cpu);
1751
    int smt;
1752

    
1753
    /* Adjust cpu index for SMT */
1754
    smt = kvmppc_smt_threads();
1755
    cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1756
        + (cs->cpu_index % smp_threads);
1757

    
1758
    return 0;
1759
}
1760

    
1761
bool kvmppc_has_cap_epr(void)
1762
{
1763
    return cap_epr;
1764
}
1765

    
1766
static int kvm_ppc_register_host_cpu_type(void)
1767
{
1768
    TypeInfo type_info = {
1769
        .name = TYPE_HOST_POWERPC_CPU,
1770
        .instance_init = kvmppc_host_cpu_initfn,
1771
        .class_init = kvmppc_host_cpu_class_init,
1772
    };
1773
    uint32_t host_pvr = mfpvr();
1774
    PowerPCCPUClass *pvr_pcc;
1775

    
1776
    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1777
    if (pvr_pcc == NULL) {
1778
        return -1;
1779
    }
1780
    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1781
    type_register(&type_info);
1782
    return 0;
1783
}
1784

    
1785

    
1786
bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1787
{
1788
    return true;
1789
}
1790

    
1791
int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1792
{
1793
    return 1;
1794
}
1795

    
1796
int kvm_arch_on_sigbus(int code, void *addr)
1797
{
1798
    return 1;
1799
}