Statistics
| Branch: | Revision:

root / target-ppc / kvm.c @ 182735ef

History | View | Annotate | Download (48.3 kB)

1
/*
2
 * PowerPC implementation of KVM hooks
3
 *
4
 * Copyright IBM Corp. 2007
5
 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6
 *
7
 * Authors:
8
 *  Jerone Young <jyoung5@us.ibm.com>
9
 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10
 *  Hollis Blanchard <hollisb@us.ibm.com>
11
 *
12
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13
 * See the COPYING file in the top-level directory.
14
 *
15
 */
16

    
17
#include <dirent.h>
18
#include <sys/types.h>
19
#include <sys/ioctl.h>
20
#include <sys/mman.h>
21
#include <sys/vfs.h>
22

    
23
#include <linux/kvm.h>
24

    
25
#include "qemu-common.h"
26
#include "qemu/timer.h"
27
#include "sysemu/sysemu.h"
28
#include "sysemu/kvm.h"
29
#include "kvm_ppc.h"
30
#include "cpu.h"
31
#include "sysemu/cpus.h"
32
#include "sysemu/device_tree.h"
33
#include "mmu-hash64.h"
34

    
35
#include "hw/sysbus.h"
36
#include "hw/ppc/spapr.h"
37
#include "hw/ppc/spapr_vio.h"
38
#include "sysemu/watchdog.h"
39

    
40
//#define DEBUG_KVM
41

    
42
#ifdef DEBUG_KVM
43
#define dprintf(fmt, ...) \
44
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45
#else
46
#define dprintf(fmt, ...) \
47
    do { } while (0)
48
#endif
49

    
50
#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
51

    
52
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53
    KVM_CAP_LAST_INFO
54
};
55

    
56
static int cap_interrupt_unset = false;
57
static int cap_interrupt_level = false;
58
static int cap_segstate;
59
static int cap_booke_sregs;
60
static int cap_ppc_smt;
61
static int cap_ppc_rma;
62
static int cap_spapr_tce;
63
static int cap_hior;
64
static int cap_one_reg;
65
static int cap_epr;
66
static int cap_ppc_watchdog;
67
static int cap_papr;
68

    
69
/* XXX We have a race condition where we actually have a level triggered
70
 *     interrupt, but the infrastructure can't expose that yet, so the guest
71
 *     takes but ignores it, goes to sleep and never gets notified that there's
72
 *     still an interrupt pending.
73
 *
74
 *     As a quick workaround, let's just wake up again 20 ms after we injected
75
 *     an interrupt. That way we can assure that we're always reinjecting
76
 *     interrupts in case the guest swallowed them.
77
 */
78
static QEMUTimer *idle_timer;
79

    
80
static void kvm_kick_cpu(void *opaque)
81
{
82
    PowerPCCPU *cpu = opaque;
83

    
84
    qemu_cpu_kick(CPU(cpu));
85
}
86

    
87
static int kvm_ppc_register_host_cpu_type(void);
88

    
89
int kvm_arch_init(KVMState *s)
90
{
91
    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
92
    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
93
    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
94
    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
95
    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
96
    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
97
    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
98
    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
99
    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
100
    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
101
    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
102
    /* Note: we don't set cap_papr here, because this capability is
103
     * only activated after this by kvmppc_set_papr() */
104

    
105
    if (!cap_interrupt_level) {
106
        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
107
                        "VM to stall at times!\n");
108
    }
109

    
110
    kvm_ppc_register_host_cpu_type();
111

    
112
    return 0;
113
}
114

    
115
static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
116
{
117
    CPUPPCState *cenv = &cpu->env;
118
    CPUState *cs = CPU(cpu);
119
    struct kvm_sregs sregs;
120
    int ret;
121

    
122
    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
123
        /* What we're really trying to say is "if we're on BookE, we use
124
           the native PVR for now". This is the only sane way to check
125
           it though, so we potentially confuse users that they can run
126
           BookE guests on BookS. Let's hope nobody dares enough :) */
127
        return 0;
128
    } else {
129
        if (!cap_segstate) {
130
            fprintf(stderr, "kvm error: missing PVR setting capability\n");
131
            return -ENOSYS;
132
        }
133
    }
134

    
135
    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
136
    if (ret) {
137
        return ret;
138
    }
139

    
140
    sregs.pvr = cenv->spr[SPR_PVR];
141
    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
142
}
143

    
144
/* Set up a shared TLB array with KVM */
145
static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
146
{
147
    CPUPPCState *env = &cpu->env;
148
    CPUState *cs = CPU(cpu);
149
    struct kvm_book3e_206_tlb_params params = {};
150
    struct kvm_config_tlb cfg = {};
151
    struct kvm_enable_cap encap = {};
152
    unsigned int entries = 0;
153
    int ret, i;
154

    
155
    if (!kvm_enabled() ||
156
        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
157
        return 0;
158
    }
159

    
160
    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
161

    
162
    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
163
        params.tlb_sizes[i] = booke206_tlb_size(env, i);
164
        params.tlb_ways[i] = booke206_tlb_ways(env, i);
165
        entries += params.tlb_sizes[i];
166
    }
167

    
168
    assert(entries == env->nb_tlb);
169
    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
170

    
171
    env->tlb_dirty = true;
172

    
173
    cfg.array = (uintptr_t)env->tlb.tlbm;
174
    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
175
    cfg.params = (uintptr_t)&params;
176
    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
177

    
178
    encap.cap = KVM_CAP_SW_TLB;
179
    encap.args[0] = (uintptr_t)&cfg;
180

    
181
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
182
    if (ret < 0) {
183
        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184
                __func__, strerror(-ret));
185
        return ret;
186
    }
187

    
188
    env->kvm_sw_tlb = true;
189
    return 0;
190
}
191

    
192

    
193
#if defined(TARGET_PPC64)
194
static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
195
                                       struct kvm_ppc_smmu_info *info)
196
{
197
    CPUPPCState *env = &cpu->env;
198
    CPUState *cs = CPU(cpu);
199

    
200
    memset(info, 0, sizeof(*info));
201

    
202
    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203
     * need to "guess" what the supported page sizes are.
204
     *
205
     * For that to work we make a few assumptions:
206
     *
207
     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208
     *   KVM which only supports 4K and 16M pages, but supports them
209
     *   regardless of the backing store characteritics. We also don't
210
     *   support 1T segments.
211
     *
212
     *   This is safe as if HV KVM ever supports that capability or PR
213
     *   KVM grows supports for more page/segment sizes, those versions
214
     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215
     *   will not hit this fallback
216
     *
217
     * - Else we are running HV KVM. This means we only support page
218
     *   sizes that fit in the backing store. Additionally we only
219
     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
220
     *   P7 encodings for the SLB and hash table. Here too, we assume
221
     *   support for any newer processor will mean a kernel that
222
     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
223
     *   this fallback.
224
     */
225
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
226
        /* No flags */
227
        info->flags = 0;
228
        info->slb_size = 64;
229

    
230
        /* Standard 4k base page size segment */
231
        info->sps[0].page_shift = 12;
232
        info->sps[0].slb_enc = 0;
233
        info->sps[0].enc[0].page_shift = 12;
234
        info->sps[0].enc[0].pte_enc = 0;
235

    
236
        /* Standard 16M large page size segment */
237
        info->sps[1].page_shift = 24;
238
        info->sps[1].slb_enc = SLB_VSID_L;
239
        info->sps[1].enc[0].page_shift = 24;
240
        info->sps[1].enc[0].pte_enc = 0;
241
    } else {
242
        int i = 0;
243

    
244
        /* HV KVM has backing store size restrictions */
245
        info->flags = KVM_PPC_PAGE_SIZES_REAL;
246

    
247
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
248
            info->flags |= KVM_PPC_1T_SEGMENTS;
249
        }
250

    
251
        if (env->mmu_model == POWERPC_MMU_2_06) {
252
            info->slb_size = 32;
253
        } else {
254
            info->slb_size = 64;
255
        }
256

    
257
        /* Standard 4k base page size segment */
258
        info->sps[i].page_shift = 12;
259
        info->sps[i].slb_enc = 0;
260
        info->sps[i].enc[0].page_shift = 12;
261
        info->sps[i].enc[0].pte_enc = 0;
262
        i++;
263

    
264
        /* 64K on MMU 2.06 */
265
        if (env->mmu_model == POWERPC_MMU_2_06) {
266
            info->sps[i].page_shift = 16;
267
            info->sps[i].slb_enc = 0x110;
268
            info->sps[i].enc[0].page_shift = 16;
269
            info->sps[i].enc[0].pte_enc = 1;
270
            i++;
271
        }
272

    
273
        /* Standard 16M large page size segment */
274
        info->sps[i].page_shift = 24;
275
        info->sps[i].slb_enc = SLB_VSID_L;
276
        info->sps[i].enc[0].page_shift = 24;
277
        info->sps[i].enc[0].pte_enc = 0;
278
    }
279
}
280

    
281
static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
282
{
283
    CPUState *cs = CPU(cpu);
284
    int ret;
285

    
286
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
287
        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
288
        if (ret == 0) {
289
            return;
290
        }
291
    }
292

    
293
    kvm_get_fallback_smmu_info(cpu, info);
294
}
295

    
296
static long getrampagesize(void)
297
{
298
    struct statfs fs;
299
    int ret;
300

    
301
    if (!mem_path) {
302
        /* guest RAM is backed by normal anonymous pages */
303
        return getpagesize();
304
    }
305

    
306
    do {
307
        ret = statfs(mem_path, &fs);
308
    } while (ret != 0 && errno == EINTR);
309

    
310
    if (ret != 0) {
311
        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
312
                strerror(errno));
313
        exit(1);
314
    }
315

    
316
#define HUGETLBFS_MAGIC       0x958458f6
317

    
318
    if (fs.f_type != HUGETLBFS_MAGIC) {
319
        /* Explicit mempath, but it's ordinary pages */
320
        return getpagesize();
321
    }
322

    
323
    /* It's hugepage, return the huge page size */
324
    return fs.f_bsize;
325
}
326

    
327
static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
328
{
329
    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
330
        return true;
331
    }
332

    
333
    return (1ul << shift) <= rampgsize;
334
}
335

    
336
static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
337
{
338
    static struct kvm_ppc_smmu_info smmu_info;
339
    static bool has_smmu_info;
340
    CPUPPCState *env = &cpu->env;
341
    long rampagesize;
342
    int iq, ik, jq, jk;
343

    
344
    /* We only handle page sizes for 64-bit server guests for now */
345
    if (!(env->mmu_model & POWERPC_MMU_64)) {
346
        return;
347
    }
348

    
349
    /* Collect MMU info from kernel if not already */
350
    if (!has_smmu_info) {
351
        kvm_get_smmu_info(cpu, &smmu_info);
352
        has_smmu_info = true;
353
    }
354

    
355
    rampagesize = getrampagesize();
356

    
357
    /* Convert to QEMU form */
358
    memset(&env->sps, 0, sizeof(env->sps));
359

    
360
    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
361
        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
362
        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
363

    
364
        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365
                                 ksps->page_shift)) {
366
            continue;
367
        }
368
        qsps->page_shift = ksps->page_shift;
369
        qsps->slb_enc = ksps->slb_enc;
370
        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
371
            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
372
                                     ksps->enc[jk].page_shift)) {
373
                continue;
374
            }
375
            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
376
            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
377
            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
378
                break;
379
            }
380
        }
381
        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
382
            break;
383
        }
384
    }
385
    env->slb_nr = smmu_info.slb_size;
386
    if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
387
        env->mmu_model |= POWERPC_MMU_1TSEG;
388
    } else {
389
        env->mmu_model &= ~POWERPC_MMU_1TSEG;
390
    }
391
}
392
#else /* defined (TARGET_PPC64) */
393

    
394
static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
395
{
396
}
397

    
398
#endif /* !defined (TARGET_PPC64) */
399

    
400
unsigned long kvm_arch_vcpu_id(CPUState *cpu)
401
{
402
    return cpu->cpu_index;
403
}
404

    
405
int kvm_arch_init_vcpu(CPUState *cs)
406
{
407
    PowerPCCPU *cpu = POWERPC_CPU(cs);
408
    CPUPPCState *cenv = &cpu->env;
409
    int ret;
410

    
411
    /* Gather server mmu info from KVM and update the CPU state */
412
    kvm_fixup_page_sizes(cpu);
413

    
414
    /* Synchronize sregs with kvm */
415
    ret = kvm_arch_sync_sregs(cpu);
416
    if (ret) {
417
        return ret;
418
    }
419

    
420
    idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
421

    
422
    /* Some targets support access to KVM's guest TLB. */
423
    switch (cenv->mmu_model) {
424
    case POWERPC_MMU_BOOKE206:
425
        ret = kvm_booke206_tlb_init(cpu);
426
        break;
427
    default:
428
        break;
429
    }
430

    
431
    return ret;
432
}
433

    
434
void kvm_arch_reset_vcpu(CPUState *cpu)
435
{
436
}
437

    
438
static void kvm_sw_tlb_put(PowerPCCPU *cpu)
439
{
440
    CPUPPCState *env = &cpu->env;
441
    CPUState *cs = CPU(cpu);
442
    struct kvm_dirty_tlb dirty_tlb;
443
    unsigned char *bitmap;
444
    int ret;
445

    
446
    if (!env->kvm_sw_tlb) {
447
        return;
448
    }
449

    
450
    bitmap = g_malloc((env->nb_tlb + 7) / 8);
451
    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
452

    
453
    dirty_tlb.bitmap = (uintptr_t)bitmap;
454
    dirty_tlb.num_dirty = env->nb_tlb;
455

    
456
    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
457
    if (ret) {
458
        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
459
                __func__, strerror(-ret));
460
    }
461

    
462
    g_free(bitmap);
463
}
464

    
465
static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
466
{
467
    PowerPCCPU *cpu = POWERPC_CPU(cs);
468
    CPUPPCState *env = &cpu->env;
469
    union {
470
        uint32_t u32;
471
        uint64_t u64;
472
    } val;
473
    struct kvm_one_reg reg = {
474
        .id = id,
475
        .addr = (uintptr_t) &val,
476
    };
477
    int ret;
478

    
479
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
480
    if (ret != 0) {
481
        fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
482
                spr, strerror(errno));
483
    } else {
484
        switch (id & KVM_REG_SIZE_MASK) {
485
        case KVM_REG_SIZE_U32:
486
            env->spr[spr] = val.u32;
487
            break;
488

    
489
        case KVM_REG_SIZE_U64:
490
            env->spr[spr] = val.u64;
491
            break;
492

    
493
        default:
494
            /* Don't handle this size yet */
495
            abort();
496
        }
497
    }
498
}
499

    
500
static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
501
{
502
    PowerPCCPU *cpu = POWERPC_CPU(cs);
503
    CPUPPCState *env = &cpu->env;
504
    union {
505
        uint32_t u32;
506
        uint64_t u64;
507
    } val;
508
    struct kvm_one_reg reg = {
509
        .id = id,
510
        .addr = (uintptr_t) &val,
511
    };
512
    int ret;
513

    
514
    switch (id & KVM_REG_SIZE_MASK) {
515
    case KVM_REG_SIZE_U32:
516
        val.u32 = env->spr[spr];
517
        break;
518

    
519
    case KVM_REG_SIZE_U64:
520
        val.u64 = env->spr[spr];
521
        break;
522

    
523
    default:
524
        /* Don't handle this size yet */
525
        abort();
526
    }
527

    
528
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
529
    if (ret != 0) {
530
        fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
531
                spr, strerror(errno));
532
    }
533
}
534

    
535
static int kvm_put_fp(CPUState *cs)
536
{
537
    PowerPCCPU *cpu = POWERPC_CPU(cs);
538
    CPUPPCState *env = &cpu->env;
539
    struct kvm_one_reg reg;
540
    int i;
541
    int ret;
542

    
543
    if (env->insns_flags & PPC_FLOAT) {
544
        uint64_t fpscr = env->fpscr;
545
        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
546

    
547
        reg.id = KVM_REG_PPC_FPSCR;
548
        reg.addr = (uintptr_t)&fpscr;
549
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
550
        if (ret < 0) {
551
            dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
552
            return ret;
553
        }
554

    
555
        for (i = 0; i < 32; i++) {
556
            uint64_t vsr[2];
557

    
558
            vsr[0] = float64_val(env->fpr[i]);
559
            vsr[1] = env->vsr[i];
560
            reg.addr = (uintptr_t) &vsr;
561
            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
562

    
563
            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
564
            if (ret < 0) {
565
                dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
566
                        i, strerror(errno));
567
                return ret;
568
            }
569
        }
570
    }
571

    
572
    if (env->insns_flags & PPC_ALTIVEC) {
573
        reg.id = KVM_REG_PPC_VSCR;
574
        reg.addr = (uintptr_t)&env->vscr;
575
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
576
        if (ret < 0) {
577
            dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
578
            return ret;
579
        }
580

    
581
        for (i = 0; i < 32; i++) {
582
            reg.id = KVM_REG_PPC_VR(i);
583
            reg.addr = (uintptr_t)&env->avr[i];
584
            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
585
            if (ret < 0) {
586
                dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
587
                return ret;
588
            }
589
        }
590
    }
591

    
592
    return 0;
593
}
594

    
595
static int kvm_get_fp(CPUState *cs)
596
{
597
    PowerPCCPU *cpu = POWERPC_CPU(cs);
598
    CPUPPCState *env = &cpu->env;
599
    struct kvm_one_reg reg;
600
    int i;
601
    int ret;
602

    
603
    if (env->insns_flags & PPC_FLOAT) {
604
        uint64_t fpscr;
605
        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
606

    
607
        reg.id = KVM_REG_PPC_FPSCR;
608
        reg.addr = (uintptr_t)&fpscr;
609
        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
610
        if (ret < 0) {
611
            dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
612
            return ret;
613
        } else {
614
            env->fpscr = fpscr;
615
        }
616

    
617
        for (i = 0; i < 32; i++) {
618
            uint64_t vsr[2];
619

    
620
            reg.addr = (uintptr_t) &vsr;
621
            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
622

    
623
            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
624
            if (ret < 0) {
625
                dprintf("Unable to get %s%d from KVM: %s\n",
626
                        vsx ? "VSR" : "FPR", i, strerror(errno));
627
                return ret;
628
            } else {
629
                env->fpr[i] = vsr[0];
630
                if (vsx) {
631
                    env->vsr[i] = vsr[1];
632
                }
633
            }
634
        }
635
    }
636

    
637
    if (env->insns_flags & PPC_ALTIVEC) {
638
        reg.id = KVM_REG_PPC_VSCR;
639
        reg.addr = (uintptr_t)&env->vscr;
640
        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
641
        if (ret < 0) {
642
            dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
643
            return ret;
644
        }
645

    
646
        for (i = 0; i < 32; i++) {
647
            reg.id = KVM_REG_PPC_VR(i);
648
            reg.addr = (uintptr_t)&env->avr[i];
649
            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650
            if (ret < 0) {
651
                dprintf("Unable to get VR%d from KVM: %s\n",
652
                        i, strerror(errno));
653
                return ret;
654
            }
655
        }
656
    }
657

    
658
    return 0;
659
}
660

    
661
#if defined(TARGET_PPC64)
662
static int kvm_get_vpa(CPUState *cs)
663
{
664
    PowerPCCPU *cpu = POWERPC_CPU(cs);
665
    CPUPPCState *env = &cpu->env;
666
    struct kvm_one_reg reg;
667
    int ret;
668

    
669
    reg.id = KVM_REG_PPC_VPA_ADDR;
670
    reg.addr = (uintptr_t)&env->vpa_addr;
671
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
672
    if (ret < 0) {
673
        dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno));
674
        return ret;
675
    }
676

    
677
    assert((uintptr_t)&env->slb_shadow_size
678
           == ((uintptr_t)&env->slb_shadow_addr + 8));
679
    reg.id = KVM_REG_PPC_VPA_SLB;
680
    reg.addr = (uintptr_t)&env->slb_shadow_addr;
681
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
682
    if (ret < 0) {
683
        dprintf("Unable to get SLB shadow state from KVM: %s\n",
684
                strerror(errno));
685
        return ret;
686
    }
687

    
688
    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
689
    reg.id = KVM_REG_PPC_VPA_DTL;
690
    reg.addr = (uintptr_t)&env->dtl_addr;
691
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
692
    if (ret < 0) {
693
        dprintf("Unable to get dispatch trace log state from KVM: %s\n",
694
                strerror(errno));
695
        return ret;
696
    }
697

    
698
    return 0;
699
}
700

    
701
static int kvm_put_vpa(CPUState *cs)
702
{
703
    PowerPCCPU *cpu = POWERPC_CPU(cs);
704
    CPUPPCState *env = &cpu->env;
705
    struct kvm_one_reg reg;
706
    int ret;
707

    
708
    /* SLB shadow or DTL can't be registered unless a master VPA is
709
     * registered.  That means when restoring state, if a VPA *is*
710
     * registered, we need to set that up first.  If not, we need to
711
     * deregister the others before deregistering the master VPA */
712
    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
713

    
714
    if (env->vpa_addr) {
715
        reg.id = KVM_REG_PPC_VPA_ADDR;
716
        reg.addr = (uintptr_t)&env->vpa_addr;
717
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718
        if (ret < 0) {
719
            dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
720
            return ret;
721
        }
722
    }
723

    
724
    assert((uintptr_t)&env->slb_shadow_size
725
           == ((uintptr_t)&env->slb_shadow_addr + 8));
726
    reg.id = KVM_REG_PPC_VPA_SLB;
727
    reg.addr = (uintptr_t)&env->slb_shadow_addr;
728
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729
    if (ret < 0) {
730
        dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
731
        return ret;
732
    }
733

    
734
    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
735
    reg.id = KVM_REG_PPC_VPA_DTL;
736
    reg.addr = (uintptr_t)&env->dtl_addr;
737
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
738
    if (ret < 0) {
739
        dprintf("Unable to set dispatch trace log state to KVM: %s\n",
740
                strerror(errno));
741
        return ret;
742
    }
743

    
744
    if (!env->vpa_addr) {
745
        reg.id = KVM_REG_PPC_VPA_ADDR;
746
        reg.addr = (uintptr_t)&env->vpa_addr;
747
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
748
        if (ret < 0) {
749
            dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
750
            return ret;
751
        }
752
    }
753

    
754
    return 0;
755
}
756
#endif /* TARGET_PPC64 */
757

    
758
int kvm_arch_put_registers(CPUState *cs, int level)
759
{
760
    PowerPCCPU *cpu = POWERPC_CPU(cs);
761
    CPUPPCState *env = &cpu->env;
762
    struct kvm_regs regs;
763
    int ret;
764
    int i;
765

    
766
    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
767
    if (ret < 0) {
768
        return ret;
769
    }
770

    
771
    regs.ctr = env->ctr;
772
    regs.lr  = env->lr;
773
    regs.xer = cpu_read_xer(env);
774
    regs.msr = env->msr;
775
    regs.pc = env->nip;
776

    
777
    regs.srr0 = env->spr[SPR_SRR0];
778
    regs.srr1 = env->spr[SPR_SRR1];
779

    
780
    regs.sprg0 = env->spr[SPR_SPRG0];
781
    regs.sprg1 = env->spr[SPR_SPRG1];
782
    regs.sprg2 = env->spr[SPR_SPRG2];
783
    regs.sprg3 = env->spr[SPR_SPRG3];
784
    regs.sprg4 = env->spr[SPR_SPRG4];
785
    regs.sprg5 = env->spr[SPR_SPRG5];
786
    regs.sprg6 = env->spr[SPR_SPRG6];
787
    regs.sprg7 = env->spr[SPR_SPRG7];
788

    
789
    regs.pid = env->spr[SPR_BOOKE_PID];
790

    
791
    for (i = 0;i < 32; i++)
792
        regs.gpr[i] = env->gpr[i];
793

    
794
    regs.cr = 0;
795
    for (i = 0; i < 8; i++) {
796
        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
797
    }
798

    
799
    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
800
    if (ret < 0)
801
        return ret;
802

    
803
    kvm_put_fp(cs);
804

    
805
    if (env->tlb_dirty) {
806
        kvm_sw_tlb_put(cpu);
807
        env->tlb_dirty = false;
808
    }
809

    
810
    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
811
        struct kvm_sregs sregs;
812

    
813
        sregs.pvr = env->spr[SPR_PVR];
814

    
815
        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
816

    
817
        /* Sync SLB */
818
#ifdef TARGET_PPC64
819
        for (i = 0; i < 64; i++) {
820
            sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
821
            sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
822
        }
823
#endif
824

    
825
        /* Sync SRs */
826
        for (i = 0; i < 16; i++) {
827
            sregs.u.s.ppc32.sr[i] = env->sr[i];
828
        }
829

    
830
        /* Sync BATs */
831
        for (i = 0; i < 8; i++) {
832
            /* Beware. We have to swap upper and lower bits here */
833
            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
834
                | env->DBAT[1][i];
835
            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
836
                | env->IBAT[1][i];
837
        }
838

    
839
        ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
840
        if (ret) {
841
            return ret;
842
        }
843
    }
844

    
845
    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
846
        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
847
    }
848

    
849
    if (cap_one_reg) {
850
        int i;
851

    
852
        /* We deliberately ignore errors here, for kernels which have
853
         * the ONE_REG calls, but don't support the specific
854
         * registers, there's a reasonable chance things will still
855
         * work, at least until we try to migrate. */
856
        for (i = 0; i < 1024; i++) {
857
            uint64_t id = env->spr_cb[i].one_reg_id;
858

    
859
            if (id != 0) {
860
                kvm_put_one_spr(cs, id, i);
861
            }
862
        }
863

    
864
#ifdef TARGET_PPC64
865
        if (cap_papr) {
866
            if (kvm_put_vpa(cs) < 0) {
867
                dprintf("Warning: Unable to set VPA information to KVM\n");
868
            }
869
        }
870
#endif /* TARGET_PPC64 */
871
    }
872

    
873
    return ret;
874
}
875

    
876
int kvm_arch_get_registers(CPUState *cs)
877
{
878
    PowerPCCPU *cpu = POWERPC_CPU(cs);
879
    CPUPPCState *env = &cpu->env;
880
    struct kvm_regs regs;
881
    struct kvm_sregs sregs;
882
    uint32_t cr;
883
    int i, ret;
884

    
885
    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
886
    if (ret < 0)
887
        return ret;
888

    
889
    cr = regs.cr;
890
    for (i = 7; i >= 0; i--) {
891
        env->crf[i] = cr & 15;
892
        cr >>= 4;
893
    }
894

    
895
    env->ctr = regs.ctr;
896
    env->lr = regs.lr;
897
    cpu_write_xer(env, regs.xer);
898
    env->msr = regs.msr;
899
    env->nip = regs.pc;
900

    
901
    env->spr[SPR_SRR0] = regs.srr0;
902
    env->spr[SPR_SRR1] = regs.srr1;
903

    
904
    env->spr[SPR_SPRG0] = regs.sprg0;
905
    env->spr[SPR_SPRG1] = regs.sprg1;
906
    env->spr[SPR_SPRG2] = regs.sprg2;
907
    env->spr[SPR_SPRG3] = regs.sprg3;
908
    env->spr[SPR_SPRG4] = regs.sprg4;
909
    env->spr[SPR_SPRG5] = regs.sprg5;
910
    env->spr[SPR_SPRG6] = regs.sprg6;
911
    env->spr[SPR_SPRG7] = regs.sprg7;
912

    
913
    env->spr[SPR_BOOKE_PID] = regs.pid;
914

    
915
    for (i = 0;i < 32; i++)
916
        env->gpr[i] = regs.gpr[i];
917

    
918
    kvm_get_fp(cs);
919

    
920
    if (cap_booke_sregs) {
921
        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
922
        if (ret < 0) {
923
            return ret;
924
        }
925

    
926
        if (sregs.u.e.features & KVM_SREGS_E_BASE) {
927
            env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
928
            env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
929
            env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
930
            env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
931
            env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
932
            env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
933
            env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
934
            env->spr[SPR_DECR] = sregs.u.e.dec;
935
            env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
936
            env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
937
            env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
938
        }
939

    
940
        if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
941
            env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
942
            env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
943
            env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
944
            env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
945
            env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
946
        }
947

    
948
        if (sregs.u.e.features & KVM_SREGS_E_64) {
949
            env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
950
        }
951

    
952
        if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
953
            env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
954
        }
955

    
956
        if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
957
            env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
958
            env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
959
            env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
960
            env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
961
            env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
962
            env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
963
            env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
964
            env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
965
            env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
966
            env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
967
            env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
968
            env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
969
            env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
970
            env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
971
            env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
972
            env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
973

    
974
            if (sregs.u.e.features & KVM_SREGS_E_SPE) {
975
                env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
976
                env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
977
                env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
978
            }
979

    
980
            if (sregs.u.e.features & KVM_SREGS_E_PM) {
981
                env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
982
            }
983

    
984
            if (sregs.u.e.features & KVM_SREGS_E_PC) {
985
                env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
986
                env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
987
            }
988
        }
989

    
990
        if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
991
            env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
992
            env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
993
            env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
994
            env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
995
            env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
996
            env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
997
            env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
998
            env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
999
            env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1000
            env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1001
        }
1002

    
1003
        if (sregs.u.e.features & KVM_SREGS_EXP) {
1004
            env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1005
        }
1006

    
1007
        if (sregs.u.e.features & KVM_SREGS_E_PD) {
1008
            env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1009
            env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1010
        }
1011

    
1012
        if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1013
            env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1014
            env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1015
            env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1016

    
1017
            if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1018
                env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1019
                env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1020
            }
1021
        }
1022
    }
1023

    
1024
    if (cap_segstate) {
1025
        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1026
        if (ret < 0) {
1027
            return ret;
1028
        }
1029

    
1030
        ppc_store_sdr1(env, sregs.u.s.sdr1);
1031

    
1032
        /* Sync SLB */
1033
#ifdef TARGET_PPC64
1034
        for (i = 0; i < 64; i++) {
1035
            ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
1036
                               sregs.u.s.ppc64.slb[i].slbv);
1037
        }
1038
#endif
1039

    
1040
        /* Sync SRs */
1041
        for (i = 0; i < 16; i++) {
1042
            env->sr[i] = sregs.u.s.ppc32.sr[i];
1043
        }
1044

    
1045
        /* Sync BATs */
1046
        for (i = 0; i < 8; i++) {
1047
            env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1048
            env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1049
            env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1050
            env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1051
        }
1052
    }
1053

    
1054
    if (cap_hior) {
1055
        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1056
    }
1057

    
1058
    if (cap_one_reg) {
1059
        int i;
1060

    
1061
        /* We deliberately ignore errors here, for kernels which have
1062
         * the ONE_REG calls, but don't support the specific
1063
         * registers, there's a reasonable chance things will still
1064
         * work, at least until we try to migrate. */
1065
        for (i = 0; i < 1024; i++) {
1066
            uint64_t id = env->spr_cb[i].one_reg_id;
1067

    
1068
            if (id != 0) {
1069
                kvm_get_one_spr(cs, id, i);
1070
            }
1071
        }
1072

    
1073
#ifdef TARGET_PPC64
1074
        if (cap_papr) {
1075
            if (kvm_get_vpa(cs) < 0) {
1076
                dprintf("Warning: Unable to get VPA information from KVM\n");
1077
            }
1078
        }
1079
#endif
1080
    }
1081

    
1082
    return 0;
1083
}
1084

    
1085
int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1086
{
1087
    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1088

    
1089
    if (irq != PPC_INTERRUPT_EXT) {
1090
        return 0;
1091
    }
1092

    
1093
    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1094
        return 0;
1095
    }
1096

    
1097
    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1098

    
1099
    return 0;
1100
}
1101

    
1102
#if defined(TARGET_PPCEMB)
1103
#define PPC_INPUT_INT PPC40x_INPUT_INT
1104
#elif defined(TARGET_PPC64)
1105
#define PPC_INPUT_INT PPC970_INPUT_INT
1106
#else
1107
#define PPC_INPUT_INT PPC6xx_INPUT_INT
1108
#endif
1109

    
1110
void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1111
{
1112
    PowerPCCPU *cpu = POWERPC_CPU(cs);
1113
    CPUPPCState *env = &cpu->env;
1114
    int r;
1115
    unsigned irq;
1116

    
1117
    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1118
     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1119
    if (!cap_interrupt_level &&
1120
        run->ready_for_interrupt_injection &&
1121
        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1122
        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1123
    {
1124
        /* For now KVM disregards the 'irq' argument. However, in the
1125
         * future KVM could cache it in-kernel to avoid a heavyweight exit
1126
         * when reading the UIC.
1127
         */
1128
        irq = KVM_INTERRUPT_SET;
1129

    
1130
        dprintf("injected interrupt %d\n", irq);
1131
        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1132
        if (r < 0) {
1133
            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1134
        }
1135

    
1136
        /* Always wake up soon in case the interrupt was level based */
1137
        qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1138
                       (get_ticks_per_sec() / 50));
1139
    }
1140

    
1141
    /* We don't know if there are more interrupts pending after this. However,
1142
     * the guest will return to userspace in the course of handling this one
1143
     * anyways, so we will get a chance to deliver the rest. */
1144
}
1145

    
1146
void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1147
{
1148
}
1149

    
1150
int kvm_arch_process_async_events(CPUState *cs)
1151
{
1152
    return cs->halted;
1153
}
1154

    
1155
static int kvmppc_handle_halt(PowerPCCPU *cpu)
1156
{
1157
    CPUState *cs = CPU(cpu);
1158
    CPUPPCState *env = &cpu->env;
1159

    
1160
    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1161
        cs->halted = 1;
1162
        env->exception_index = EXCP_HLT;
1163
    }
1164

    
1165
    return 0;
1166
}
1167

    
1168
/* map dcr access to existing qemu dcr emulation */
1169
static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1170
{
1171
    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1172
        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1173

    
1174
    return 0;
1175
}
1176

    
1177
static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1178
{
1179
    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1180
        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1181

    
1182
    return 0;
1183
}
1184

    
1185
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1186
{
1187
    PowerPCCPU *cpu = POWERPC_CPU(cs);
1188
    CPUPPCState *env = &cpu->env;
1189
    int ret;
1190

    
1191
    switch (run->exit_reason) {
1192
    case KVM_EXIT_DCR:
1193
        if (run->dcr.is_write) {
1194
            dprintf("handle dcr write\n");
1195
            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1196
        } else {
1197
            dprintf("handle dcr read\n");
1198
            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1199
        }
1200
        break;
1201
    case KVM_EXIT_HLT:
1202
        dprintf("handle halt\n");
1203
        ret = kvmppc_handle_halt(cpu);
1204
        break;
1205
#if defined(TARGET_PPC64)
1206
    case KVM_EXIT_PAPR_HCALL:
1207
        dprintf("handle PAPR hypercall\n");
1208
        run->papr_hcall.ret = spapr_hypercall(cpu,
1209
                                              run->papr_hcall.nr,
1210
                                              run->papr_hcall.args);
1211
        ret = 0;
1212
        break;
1213
#endif
1214
    case KVM_EXIT_EPR:
1215
        dprintf("handle epr\n");
1216
        run->epr.epr = ldl_phys(env->mpic_iack);
1217
        ret = 0;
1218
        break;
1219
    case KVM_EXIT_WATCHDOG:
1220
        dprintf("handle watchdog expiry\n");
1221
        watchdog_perform_action();
1222
        ret = 0;
1223
        break;
1224

    
1225
    default:
1226
        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1227
        ret = -1;
1228
        break;
1229
    }
1230

    
1231
    return ret;
1232
}
1233

    
1234
int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1235
{
1236
    CPUState *cs = CPU(cpu);
1237
    uint32_t bits = tsr_bits;
1238
    struct kvm_one_reg reg = {
1239
        .id = KVM_REG_PPC_OR_TSR,
1240
        .addr = (uintptr_t) &bits,
1241
    };
1242

    
1243
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1244
}
1245

    
1246
int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1247
{
1248

    
1249
    CPUState *cs = CPU(cpu);
1250
    uint32_t bits = tsr_bits;
1251
    struct kvm_one_reg reg = {
1252
        .id = KVM_REG_PPC_CLEAR_TSR,
1253
        .addr = (uintptr_t) &bits,
1254
    };
1255

    
1256
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1257
}
1258

    
1259
int kvmppc_set_tcr(PowerPCCPU *cpu)
1260
{
1261
    CPUState *cs = CPU(cpu);
1262
    CPUPPCState *env = &cpu->env;
1263
    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1264

    
1265
    struct kvm_one_reg reg = {
1266
        .id = KVM_REG_PPC_TCR,
1267
        .addr = (uintptr_t) &tcr,
1268
    };
1269

    
1270
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1271
}
1272

    
1273
int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1274
{
1275
    CPUState *cs = CPU(cpu);
1276
    struct kvm_enable_cap encap = {};
1277
    int ret;
1278

    
1279
    if (!kvm_enabled()) {
1280
        return -1;
1281
    }
1282

    
1283
    if (!cap_ppc_watchdog) {
1284
        printf("warning: KVM does not support watchdog");
1285
        return -1;
1286
    }
1287

    
1288
    encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1289
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1290
    if (ret < 0) {
1291
        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1292
                __func__, strerror(-ret));
1293
        return ret;
1294
    }
1295

    
1296
    return ret;
1297
}
1298

    
1299
static int read_cpuinfo(const char *field, char *value, int len)
1300
{
1301
    FILE *f;
1302
    int ret = -1;
1303
    int field_len = strlen(field);
1304
    char line[512];
1305

    
1306
    f = fopen("/proc/cpuinfo", "r");
1307
    if (!f) {
1308
        return -1;
1309
    }
1310

    
1311
    do {
1312
        if(!fgets(line, sizeof(line), f)) {
1313
            break;
1314
        }
1315
        if (!strncmp(line, field, field_len)) {
1316
            pstrcpy(value, len, line);
1317
            ret = 0;
1318
            break;
1319
        }
1320
    } while(*line);
1321

    
1322
    fclose(f);
1323

    
1324
    return ret;
1325
}
1326

    
1327
uint32_t kvmppc_get_tbfreq(void)
1328
{
1329
    char line[512];
1330
    char *ns;
1331
    uint32_t retval = get_ticks_per_sec();
1332

    
1333
    if (read_cpuinfo("timebase", line, sizeof(line))) {
1334
        return retval;
1335
    }
1336

    
1337
    if (!(ns = strchr(line, ':'))) {
1338
        return retval;
1339
    }
1340

    
1341
    ns++;
1342

    
1343
    retval = atoi(ns);
1344
    return retval;
1345
}
1346

    
1347
/* Try to find a device tree node for a CPU with clock-frequency property */
1348
static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1349
{
1350
    struct dirent *dirp;
1351
    DIR *dp;
1352

    
1353
    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1354
        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1355
        return -1;
1356
    }
1357

    
1358
    buf[0] = '\0';
1359
    while ((dirp = readdir(dp)) != NULL) {
1360
        FILE *f;
1361
        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1362
                 dirp->d_name);
1363
        f = fopen(buf, "r");
1364
        if (f) {
1365
            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1366
            fclose(f);
1367
            break;
1368
        }
1369
        buf[0] = '\0';
1370
    }
1371
    closedir(dp);
1372
    if (buf[0] == '\0') {
1373
        printf("Unknown host!\n");
1374
        return -1;
1375
    }
1376

    
1377
    return 0;
1378
}
1379

    
1380
/* Read a CPU node property from the host device tree that's a single
1381
 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1382
 * (can't find or open the property, or doesn't understand the
1383
 * format) */
1384
static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1385
{
1386
    char buf[PATH_MAX];
1387
    union {
1388
        uint32_t v32;
1389
        uint64_t v64;
1390
    } u;
1391
    FILE *f;
1392
    int len;
1393

    
1394
    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1395
        return -1;
1396
    }
1397

    
1398
    strncat(buf, "/", sizeof(buf) - strlen(buf));
1399
    strncat(buf, propname, sizeof(buf) - strlen(buf));
1400

    
1401
    f = fopen(buf, "rb");
1402
    if (!f) {
1403
        return -1;
1404
    }
1405

    
1406
    len = fread(&u, 1, sizeof(u), f);
1407
    fclose(f);
1408
    switch (len) {
1409
    case 4:
1410
        /* property is a 32-bit quantity */
1411
        return be32_to_cpu(u.v32);
1412
    case 8:
1413
        return be64_to_cpu(u.v64);
1414
    }
1415

    
1416
    return 0;
1417
}
1418

    
1419
uint64_t kvmppc_get_clockfreq(void)
1420
{
1421
    return kvmppc_read_int_cpu_dt("clock-frequency");
1422
}
1423

    
1424
uint32_t kvmppc_get_vmx(void)
1425
{
1426
    return kvmppc_read_int_cpu_dt("ibm,vmx");
1427
}
1428

    
1429
uint32_t kvmppc_get_dfp(void)
1430
{
1431
    return kvmppc_read_int_cpu_dt("ibm,dfp");
1432
}
1433

    
1434
static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1435
 {
1436
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1437
     CPUState *cs = CPU(cpu);
1438

    
1439
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1440
        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1441
        return 0;
1442
    }
1443

    
1444
    return 1;
1445
}
1446

    
1447
int kvmppc_get_hasidle(CPUPPCState *env)
1448
{
1449
    struct kvm_ppc_pvinfo pvinfo;
1450

    
1451
    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1452
        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1453
        return 1;
1454
    }
1455

    
1456
    return 0;
1457
}
1458

    
1459
int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1460
{
1461
    uint32_t *hc = (uint32_t*)buf;
1462
    struct kvm_ppc_pvinfo pvinfo;
1463

    
1464
    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1465
        memcpy(buf, pvinfo.hcall, buf_len);
1466
        return 0;
1467
    }
1468

    
1469
    /*
1470
     * Fallback to always fail hypercalls:
1471
     *
1472
     *     li r3, -1
1473
     *     nop
1474
     *     nop
1475
     *     nop
1476
     */
1477

    
1478
    hc[0] = 0x3860ffff;
1479
    hc[1] = 0x60000000;
1480
    hc[2] = 0x60000000;
1481
    hc[3] = 0x60000000;
1482

    
1483
    return 0;
1484
}
1485

    
1486
void kvmppc_set_papr(PowerPCCPU *cpu)
1487
{
1488
    CPUPPCState *env = &cpu->env;
1489
    CPUState *cs = CPU(cpu);
1490
    struct kvm_enable_cap cap = {};
1491
    int ret;
1492

    
1493
    cap.cap = KVM_CAP_PPC_PAPR;
1494
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1495

    
1496
    if (ret) {
1497
        cpu_abort(env, "This KVM version does not support PAPR\n");
1498
    }
1499

    
1500
    /* Update the capability flag so we sync the right information
1501
     * with kvm */
1502
    cap_papr = 1;
1503
}
1504

    
1505
void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1506
{
1507
    CPUPPCState *env = &cpu->env;
1508
    CPUState *cs = CPU(cpu);
1509
    struct kvm_enable_cap cap = {};
1510
    int ret;
1511

    
1512
    cap.cap = KVM_CAP_PPC_EPR;
1513
    cap.args[0] = mpic_proxy;
1514
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1515

    
1516
    if (ret && mpic_proxy) {
1517
        cpu_abort(env, "This KVM version does not support EPR\n");
1518
    }
1519
}
1520

    
1521
int kvmppc_smt_threads(void)
1522
{
1523
    return cap_ppc_smt ? cap_ppc_smt : 1;
1524
}
1525

    
1526
#ifdef TARGET_PPC64
1527
off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1528
{
1529
    void *rma;
1530
    off_t size;
1531
    int fd;
1532
    struct kvm_allocate_rma ret;
1533
    MemoryRegion *rma_region;
1534

    
1535
    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1536
     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1537
     *                      not necessary on this hardware
1538
     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1539
     *
1540
     * FIXME: We should allow the user to force contiguous RMA
1541
     * allocation in the cap_ppc_rma==1 case.
1542
     */
1543
    if (cap_ppc_rma < 2) {
1544
        return 0;
1545
    }
1546

    
1547
    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1548
    if (fd < 0) {
1549
        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1550
                strerror(errno));
1551
        return -1;
1552
    }
1553

    
1554
    size = MIN(ret.rma_size, 256ul << 20);
1555

    
1556
    rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1557
    if (rma == MAP_FAILED) {
1558
        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1559
        return -1;
1560
    };
1561

    
1562
    rma_region = g_new(MemoryRegion, 1);
1563
    memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1564
    vmstate_register_ram_global(rma_region);
1565
    memory_region_add_subregion(sysmem, 0, rma_region);
1566

    
1567
    return size;
1568
}
1569

    
1570
uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1571
{
1572
    struct kvm_ppc_smmu_info info;
1573
    long rampagesize, best_page_shift;
1574
    int i;
1575

    
1576
    if (cap_ppc_rma >= 2) {
1577
        return current_size;
1578
    }
1579

    
1580
    /* Find the largest hardware supported page size that's less than
1581
     * or equal to the (logical) backing page size of guest RAM */
1582
    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1583
    rampagesize = getrampagesize();
1584
    best_page_shift = 0;
1585

    
1586
    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1587
        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1588

    
1589
        if (!sps->page_shift) {
1590
            continue;
1591
        }
1592

    
1593
        if ((sps->page_shift > best_page_shift)
1594
            && ((1UL << sps->page_shift) <= rampagesize)) {
1595
            best_page_shift = sps->page_shift;
1596
        }
1597
    }
1598

    
1599
    return MIN(current_size,
1600
               1ULL << (best_page_shift + hash_shift - 7));
1601
}
1602
#endif
1603

    
1604
void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1605
{
1606
    struct kvm_create_spapr_tce args = {
1607
        .liobn = liobn,
1608
        .window_size = window_size,
1609
    };
1610
    long len;
1611
    int fd;
1612
    void *table;
1613

    
1614
    /* Must set fd to -1 so we don't try to munmap when called for
1615
     * destroying the table, which the upper layers -will- do
1616
     */
1617
    *pfd = -1;
1618
    if (!cap_spapr_tce) {
1619
        return NULL;
1620
    }
1621

    
1622
    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1623
    if (fd < 0) {
1624
        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1625
                liobn);
1626
        return NULL;
1627
    }
1628

    
1629
    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1630
    /* FIXME: round this up to page size */
1631

    
1632
    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1633
    if (table == MAP_FAILED) {
1634
        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1635
                liobn);
1636
        close(fd);
1637
        return NULL;
1638
    }
1639

    
1640
    *pfd = fd;
1641
    return table;
1642
}
1643

    
1644
int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1645
{
1646
    long len;
1647

    
1648
    if (fd < 0) {
1649
        return -1;
1650
    }
1651

    
1652
    len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1653
    if ((munmap(table, len) < 0) ||
1654
        (close(fd) < 0)) {
1655
        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1656
                strerror(errno));
1657
        /* Leak the table */
1658
    }
1659

    
1660
    return 0;
1661
}
1662

    
1663
int kvmppc_reset_htab(int shift_hint)
1664
{
1665
    uint32_t shift = shift_hint;
1666

    
1667
    if (!kvm_enabled()) {
1668
        /* Full emulation, tell caller to allocate htab itself */
1669
        return 0;
1670
    }
1671
    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1672
        int ret;
1673
        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1674
        if (ret == -ENOTTY) {
1675
            /* At least some versions of PR KVM advertise the
1676
             * capability, but don't implement the ioctl().  Oops.
1677
             * Return 0 so that we allocate the htab in qemu, as is
1678
             * correct for PR. */
1679
            return 0;
1680
        } else if (ret < 0) {
1681
            return ret;
1682
        }
1683
        return shift;
1684
    }
1685

    
1686
    /* We have a kernel that predates the htab reset calls.  For PR
1687
     * KVM, we need to allocate the htab ourselves, for an HV KVM of
1688
     * this era, it has allocated a 16MB fixed size hash table
1689
     * already.  Kernels of this era have the GET_PVINFO capability
1690
     * only on PR, so we use this hack to determine the right
1691
     * answer */
1692
    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1693
        /* PR - tell caller to allocate htab */
1694
        return 0;
1695
    } else {
1696
        /* HV - assume 16MB kernel allocated htab */
1697
        return 24;
1698
    }
1699
}
1700

    
1701
static inline uint32_t mfpvr(void)
1702
{
1703
    uint32_t pvr;
1704

    
1705
    asm ("mfpvr %0"
1706
         : "=r"(pvr));
1707
    return pvr;
1708
}
1709

    
1710
static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1711
{
1712
    if (on) {
1713
        *word |= flags;
1714
    } else {
1715
        *word &= ~flags;
1716
    }
1717
}
1718

    
1719
static void kvmppc_host_cpu_initfn(Object *obj)
1720
{
1721
    assert(kvm_enabled());
1722
}
1723

    
1724
static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1725
{
1726
    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1727
    uint32_t vmx = kvmppc_get_vmx();
1728
    uint32_t dfp = kvmppc_get_dfp();
1729
    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1730
    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1731

    
1732
    /* Now fix up the class with information we can query from the host */
1733

    
1734
    if (vmx != -1) {
1735
        /* Only override when we know what the host supports */
1736
        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1737
        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1738
    }
1739
    if (dfp != -1) {
1740
        /* Only override when we know what the host supports */
1741
        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1742
    }
1743

    
1744
    if (dcache_size != -1) {
1745
        pcc->l1_dcache_size = dcache_size;
1746
    }
1747

    
1748
    if (icache_size != -1) {
1749
        pcc->l1_icache_size = icache_size;
1750
    }
1751
}
1752

    
1753
int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1754
{
1755
    CPUState *cs = CPU(cpu);
1756
    int smt;
1757

    
1758
    /* Adjust cpu index for SMT */
1759
    smt = kvmppc_smt_threads();
1760
    cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1761
        + (cs->cpu_index % smp_threads);
1762

    
1763
    return 0;
1764
}
1765

    
1766
bool kvmppc_has_cap_epr(void)
1767
{
1768
    return cap_epr;
1769
}
1770

    
1771
static int kvm_ppc_register_host_cpu_type(void)
1772
{
1773
    TypeInfo type_info = {
1774
        .name = TYPE_HOST_POWERPC_CPU,
1775
        .instance_init = kvmppc_host_cpu_initfn,
1776
        .class_init = kvmppc_host_cpu_class_init,
1777
    };
1778
    uint32_t host_pvr = mfpvr();
1779
    PowerPCCPUClass *pvr_pcc;
1780

    
1781
    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1782
    if (pvr_pcc == NULL) {
1783
        return -1;
1784
    }
1785
    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1786
    type_register(&type_info);
1787
    return 0;
1788
}
1789

    
1790

    
1791
bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1792
{
1793
    return true;
1794
}
1795

    
1796
int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1797
{
1798
    return 1;
1799
}
1800

    
1801
int kvm_arch_on_sigbus(int code, void *addr)
1802
{
1803
    return 1;
1804
}
1805

    
1806
void kvm_arch_init_irq_routing(KVMState *s)
1807
{
1808
}