Statistics
| Branch: | Revision:

root / target-ppc / kvm.c @ 7c43bca0

History | View | Annotate | Download (52.9 kB)

1
/*
2
 * PowerPC implementation of KVM hooks
3
 *
4
 * Copyright IBM Corp. 2007
5
 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6
 *
7
 * Authors:
8
 *  Jerone Young <jyoung5@us.ibm.com>
9
 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10
 *  Hollis Blanchard <hollisb@us.ibm.com>
11
 *
12
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13
 * See the COPYING file in the top-level directory.
14
 *
15
 */
16

    
17
#include <dirent.h>
18
#include <sys/types.h>
19
#include <sys/ioctl.h>
20
#include <sys/mman.h>
21
#include <sys/vfs.h>
22

    
23
#include <linux/kvm.h>
24

    
25
#include "qemu-common.h"
26
#include "qemu/timer.h"
27
#include "sysemu/sysemu.h"
28
#include "sysemu/kvm.h"
29
#include "kvm_ppc.h"
30
#include "cpu.h"
31
#include "sysemu/cpus.h"
32
#include "sysemu/device_tree.h"
33
#include "mmu-hash64.h"
34

    
35
#include "hw/sysbus.h"
36
#include "hw/ppc/spapr.h"
37
#include "hw/ppc/spapr_vio.h"
38
#include "sysemu/watchdog.h"
39
#include "trace.h"
40

    
41
//#define DEBUG_KVM
42

    
43
#ifdef DEBUG_KVM
44
#define DPRINTF(fmt, ...) \
45
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46
#else
47
#define DPRINTF(fmt, ...) \
48
    do { } while (0)
49
#endif
50

    
51
#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
52

    
53
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54
    KVM_CAP_LAST_INFO
55
};
56

    
57
static int cap_interrupt_unset = false;
58
static int cap_interrupt_level = false;
59
static int cap_segstate;
60
static int cap_booke_sregs;
61
static int cap_ppc_smt;
62
static int cap_ppc_rma;
63
static int cap_spapr_tce;
64
static int cap_hior;
65
static int cap_one_reg;
66
static int cap_epr;
67
static int cap_ppc_watchdog;
68
static int cap_papr;
69
static int cap_htab_fd;
70

    
71
/* XXX We have a race condition where we actually have a level triggered
72
 *     interrupt, but the infrastructure can't expose that yet, so the guest
73
 *     takes but ignores it, goes to sleep and never gets notified that there's
74
 *     still an interrupt pending.
75
 *
76
 *     As a quick workaround, let's just wake up again 20 ms after we injected
77
 *     an interrupt. That way we can assure that we're always reinjecting
78
 *     interrupts in case the guest swallowed them.
79
 */
80
static QEMUTimer *idle_timer;
81

    
82
static void kvm_kick_cpu(void *opaque)
83
{
84
    PowerPCCPU *cpu = opaque;
85

    
86
    qemu_cpu_kick(CPU(cpu));
87
}
88

    
89
static int kvm_ppc_register_host_cpu_type(void);
90

    
91
int kvm_arch_init(KVMState *s)
92
{
93
    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
94
    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
95
    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
96
    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
97
    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
98
    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
99
    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
100
    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
101
    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
102
    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
103
    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
104
    /* Note: we don't set cap_papr here, because this capability is
105
     * only activated after this by kvmppc_set_papr() */
106
    cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
107

    
108
    if (!cap_interrupt_level) {
109
        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
110
                        "VM to stall at times!\n");
111
    }
112

    
113
    kvm_ppc_register_host_cpu_type();
114

    
115
    return 0;
116
}
117

    
118
static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
119
{
120
    CPUPPCState *cenv = &cpu->env;
121
    CPUState *cs = CPU(cpu);
122
    struct kvm_sregs sregs;
123
    int ret;
124

    
125
    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
126
        /* What we're really trying to say is "if we're on BookE, we use
127
           the native PVR for now". This is the only sane way to check
128
           it though, so we potentially confuse users that they can run
129
           BookE guests on BookS. Let's hope nobody dares enough :) */
130
        return 0;
131
    } else {
132
        if (!cap_segstate) {
133
            fprintf(stderr, "kvm error: missing PVR setting capability\n");
134
            return -ENOSYS;
135
        }
136
    }
137

    
138
    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
139
    if (ret) {
140
        return ret;
141
    }
142

    
143
    sregs.pvr = cenv->spr[SPR_PVR];
144
    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
145
}
146

    
147
/* Set up a shared TLB array with KVM */
148
static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
149
{
150
    CPUPPCState *env = &cpu->env;
151
    CPUState *cs = CPU(cpu);
152
    struct kvm_book3e_206_tlb_params params = {};
153
    struct kvm_config_tlb cfg = {};
154
    struct kvm_enable_cap encap = {};
155
    unsigned int entries = 0;
156
    int ret, i;
157

    
158
    if (!kvm_enabled() ||
159
        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
160
        return 0;
161
    }
162

    
163
    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
164

    
165
    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
166
        params.tlb_sizes[i] = booke206_tlb_size(env, i);
167
        params.tlb_ways[i] = booke206_tlb_ways(env, i);
168
        entries += params.tlb_sizes[i];
169
    }
170

    
171
    assert(entries == env->nb_tlb);
172
    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
173

    
174
    env->tlb_dirty = true;
175

    
176
    cfg.array = (uintptr_t)env->tlb.tlbm;
177
    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
178
    cfg.params = (uintptr_t)&params;
179
    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
180

    
181
    encap.cap = KVM_CAP_SW_TLB;
182
    encap.args[0] = (uintptr_t)&cfg;
183

    
184
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
185
    if (ret < 0) {
186
        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
187
                __func__, strerror(-ret));
188
        return ret;
189
    }
190

    
191
    env->kvm_sw_tlb = true;
192
    return 0;
193
}
194

    
195

    
196
#if defined(TARGET_PPC64)
197
static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
198
                                       struct kvm_ppc_smmu_info *info)
199
{
200
    CPUPPCState *env = &cpu->env;
201
    CPUState *cs = CPU(cpu);
202

    
203
    memset(info, 0, sizeof(*info));
204

    
205
    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
206
     * need to "guess" what the supported page sizes are.
207
     *
208
     * For that to work we make a few assumptions:
209
     *
210
     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
211
     *   KVM which only supports 4K and 16M pages, but supports them
212
     *   regardless of the backing store characteritics. We also don't
213
     *   support 1T segments.
214
     *
215
     *   This is safe as if HV KVM ever supports that capability or PR
216
     *   KVM grows supports for more page/segment sizes, those versions
217
     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
218
     *   will not hit this fallback
219
     *
220
     * - Else we are running HV KVM. This means we only support page
221
     *   sizes that fit in the backing store. Additionally we only
222
     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
223
     *   P7 encodings for the SLB and hash table. Here too, we assume
224
     *   support for any newer processor will mean a kernel that
225
     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
226
     *   this fallback.
227
     */
228
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
229
        /* No flags */
230
        info->flags = 0;
231
        info->slb_size = 64;
232

    
233
        /* Standard 4k base page size segment */
234
        info->sps[0].page_shift = 12;
235
        info->sps[0].slb_enc = 0;
236
        info->sps[0].enc[0].page_shift = 12;
237
        info->sps[0].enc[0].pte_enc = 0;
238

    
239
        /* Standard 16M large page size segment */
240
        info->sps[1].page_shift = 24;
241
        info->sps[1].slb_enc = SLB_VSID_L;
242
        info->sps[1].enc[0].page_shift = 24;
243
        info->sps[1].enc[0].pte_enc = 0;
244
    } else {
245
        int i = 0;
246

    
247
        /* HV KVM has backing store size restrictions */
248
        info->flags = KVM_PPC_PAGE_SIZES_REAL;
249

    
250
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
251
            info->flags |= KVM_PPC_1T_SEGMENTS;
252
        }
253

    
254
        if (env->mmu_model == POWERPC_MMU_2_06) {
255
            info->slb_size = 32;
256
        } else {
257
            info->slb_size = 64;
258
        }
259

    
260
        /* Standard 4k base page size segment */
261
        info->sps[i].page_shift = 12;
262
        info->sps[i].slb_enc = 0;
263
        info->sps[i].enc[0].page_shift = 12;
264
        info->sps[i].enc[0].pte_enc = 0;
265
        i++;
266

    
267
        /* 64K on MMU 2.06 */
268
        if (env->mmu_model == POWERPC_MMU_2_06) {
269
            info->sps[i].page_shift = 16;
270
            info->sps[i].slb_enc = 0x110;
271
            info->sps[i].enc[0].page_shift = 16;
272
            info->sps[i].enc[0].pte_enc = 1;
273
            i++;
274
        }
275

    
276
        /* Standard 16M large page size segment */
277
        info->sps[i].page_shift = 24;
278
        info->sps[i].slb_enc = SLB_VSID_L;
279
        info->sps[i].enc[0].page_shift = 24;
280
        info->sps[i].enc[0].pte_enc = 0;
281
    }
282
}
283

    
284
static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
285
{
286
    CPUState *cs = CPU(cpu);
287
    int ret;
288

    
289
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
290
        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
291
        if (ret == 0) {
292
            return;
293
        }
294
    }
295

    
296
    kvm_get_fallback_smmu_info(cpu, info);
297
}
298

    
299
static long getrampagesize(void)
300
{
301
    struct statfs fs;
302
    int ret;
303

    
304
    if (!mem_path) {
305
        /* guest RAM is backed by normal anonymous pages */
306
        return getpagesize();
307
    }
308

    
309
    do {
310
        ret = statfs(mem_path, &fs);
311
    } while (ret != 0 && errno == EINTR);
312

    
313
    if (ret != 0) {
314
        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
315
                strerror(errno));
316
        exit(1);
317
    }
318

    
319
#define HUGETLBFS_MAGIC       0x958458f6
320

    
321
    if (fs.f_type != HUGETLBFS_MAGIC) {
322
        /* Explicit mempath, but it's ordinary pages */
323
        return getpagesize();
324
    }
325

    
326
    /* It's hugepage, return the huge page size */
327
    return fs.f_bsize;
328
}
329

    
330
static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
331
{
332
    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
333
        return true;
334
    }
335

    
336
    return (1ul << shift) <= rampgsize;
337
}
338

    
339
static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
340
{
341
    static struct kvm_ppc_smmu_info smmu_info;
342
    static bool has_smmu_info;
343
    CPUPPCState *env = &cpu->env;
344
    long rampagesize;
345
    int iq, ik, jq, jk;
346

    
347
    /* We only handle page sizes for 64-bit server guests for now */
348
    if (!(env->mmu_model & POWERPC_MMU_64)) {
349
        return;
350
    }
351

    
352
    /* Collect MMU info from kernel if not already */
353
    if (!has_smmu_info) {
354
        kvm_get_smmu_info(cpu, &smmu_info);
355
        has_smmu_info = true;
356
    }
357

    
358
    rampagesize = getrampagesize();
359

    
360
    /* Convert to QEMU form */
361
    memset(&env->sps, 0, sizeof(env->sps));
362

    
363
    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
364
        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
365
        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
366

    
367
        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
368
                                 ksps->page_shift)) {
369
            continue;
370
        }
371
        qsps->page_shift = ksps->page_shift;
372
        qsps->slb_enc = ksps->slb_enc;
373
        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
374
            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
375
                                     ksps->enc[jk].page_shift)) {
376
                continue;
377
            }
378
            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
379
            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
380
            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
381
                break;
382
            }
383
        }
384
        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
385
            break;
386
        }
387
    }
388
    env->slb_nr = smmu_info.slb_size;
389
    if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
390
        env->mmu_model |= POWERPC_MMU_1TSEG;
391
    } else {
392
        env->mmu_model &= ~POWERPC_MMU_1TSEG;
393
    }
394
}
395
#else /* defined (TARGET_PPC64) */
396

    
397
static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
398
{
399
}
400

    
401
#endif /* !defined (TARGET_PPC64) */
402

    
403
unsigned long kvm_arch_vcpu_id(CPUState *cpu)
404
{
405
    return cpu->cpu_index;
406
}
407

    
408
int kvm_arch_init_vcpu(CPUState *cs)
409
{
410
    PowerPCCPU *cpu = POWERPC_CPU(cs);
411
    CPUPPCState *cenv = &cpu->env;
412
    int ret;
413

    
414
    /* Gather server mmu info from KVM and update the CPU state */
415
    kvm_fixup_page_sizes(cpu);
416

    
417
    /* Synchronize sregs with kvm */
418
    ret = kvm_arch_sync_sregs(cpu);
419
    if (ret) {
420
        return ret;
421
    }
422

    
423
    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
424

    
425
    /* Some targets support access to KVM's guest TLB. */
426
    switch (cenv->mmu_model) {
427
    case POWERPC_MMU_BOOKE206:
428
        ret = kvm_booke206_tlb_init(cpu);
429
        break;
430
    default:
431
        break;
432
    }
433

    
434
    return ret;
435
}
436

    
437
void kvm_arch_reset_vcpu(CPUState *cpu)
438
{
439
}
440

    
441
static void kvm_sw_tlb_put(PowerPCCPU *cpu)
442
{
443
    CPUPPCState *env = &cpu->env;
444
    CPUState *cs = CPU(cpu);
445
    struct kvm_dirty_tlb dirty_tlb;
446
    unsigned char *bitmap;
447
    int ret;
448

    
449
    if (!env->kvm_sw_tlb) {
450
        return;
451
    }
452

    
453
    bitmap = g_malloc((env->nb_tlb + 7) / 8);
454
    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
455

    
456
    dirty_tlb.bitmap = (uintptr_t)bitmap;
457
    dirty_tlb.num_dirty = env->nb_tlb;
458

    
459
    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
460
    if (ret) {
461
        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
462
                __func__, strerror(-ret));
463
    }
464

    
465
    g_free(bitmap);
466
}
467

    
468
static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
469
{
470
    PowerPCCPU *cpu = POWERPC_CPU(cs);
471
    CPUPPCState *env = &cpu->env;
472
    union {
473
        uint32_t u32;
474
        uint64_t u64;
475
    } val;
476
    struct kvm_one_reg reg = {
477
        .id = id,
478
        .addr = (uintptr_t) &val,
479
    };
480
    int ret;
481

    
482
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
483
    if (ret != 0) {
484
        trace_kvm_failed_spr_get(spr, strerror(errno));
485
    } else {
486
        switch (id & KVM_REG_SIZE_MASK) {
487
        case KVM_REG_SIZE_U32:
488
            env->spr[spr] = val.u32;
489
            break;
490

    
491
        case KVM_REG_SIZE_U64:
492
            env->spr[spr] = val.u64;
493
            break;
494

    
495
        default:
496
            /* Don't handle this size yet */
497
            abort();
498
        }
499
    }
500
}
501

    
502
static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
503
{
504
    PowerPCCPU *cpu = POWERPC_CPU(cs);
505
    CPUPPCState *env = &cpu->env;
506
    union {
507
        uint32_t u32;
508
        uint64_t u64;
509
    } val;
510
    struct kvm_one_reg reg = {
511
        .id = id,
512
        .addr = (uintptr_t) &val,
513
    };
514
    int ret;
515

    
516
    switch (id & KVM_REG_SIZE_MASK) {
517
    case KVM_REG_SIZE_U32:
518
        val.u32 = env->spr[spr];
519
        break;
520

    
521
    case KVM_REG_SIZE_U64:
522
        val.u64 = env->spr[spr];
523
        break;
524

    
525
    default:
526
        /* Don't handle this size yet */
527
        abort();
528
    }
529

    
530
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
531
    if (ret != 0) {
532
        trace_kvm_failed_spr_set(spr, strerror(errno));
533
    }
534
}
535

    
536
static int kvm_put_fp(CPUState *cs)
537
{
538
    PowerPCCPU *cpu = POWERPC_CPU(cs);
539
    CPUPPCState *env = &cpu->env;
540
    struct kvm_one_reg reg;
541
    int i;
542
    int ret;
543

    
544
    if (env->insns_flags & PPC_FLOAT) {
545
        uint64_t fpscr = env->fpscr;
546
        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
547

    
548
        reg.id = KVM_REG_PPC_FPSCR;
549
        reg.addr = (uintptr_t)&fpscr;
550
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
551
        if (ret < 0) {
552
            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
553
            return ret;
554
        }
555

    
556
        for (i = 0; i < 32; i++) {
557
            uint64_t vsr[2];
558

    
559
            vsr[0] = float64_val(env->fpr[i]);
560
            vsr[1] = env->vsr[i];
561
            reg.addr = (uintptr_t) &vsr;
562
            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
563

    
564
            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
565
            if (ret < 0) {
566
                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
567
                        i, strerror(errno));
568
                return ret;
569
            }
570
        }
571
    }
572

    
573
    if (env->insns_flags & PPC_ALTIVEC) {
574
        reg.id = KVM_REG_PPC_VSCR;
575
        reg.addr = (uintptr_t)&env->vscr;
576
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
577
        if (ret < 0) {
578
            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
579
            return ret;
580
        }
581

    
582
        for (i = 0; i < 32; i++) {
583
            reg.id = KVM_REG_PPC_VR(i);
584
            reg.addr = (uintptr_t)&env->avr[i];
585
            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
586
            if (ret < 0) {
587
                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
588
                return ret;
589
            }
590
        }
591
    }
592

    
593
    return 0;
594
}
595

    
596
static int kvm_get_fp(CPUState *cs)
597
{
598
    PowerPCCPU *cpu = POWERPC_CPU(cs);
599
    CPUPPCState *env = &cpu->env;
600
    struct kvm_one_reg reg;
601
    int i;
602
    int ret;
603

    
604
    if (env->insns_flags & PPC_FLOAT) {
605
        uint64_t fpscr;
606
        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
607

    
608
        reg.id = KVM_REG_PPC_FPSCR;
609
        reg.addr = (uintptr_t)&fpscr;
610
        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
611
        if (ret < 0) {
612
            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
613
            return ret;
614
        } else {
615
            env->fpscr = fpscr;
616
        }
617

    
618
        for (i = 0; i < 32; i++) {
619
            uint64_t vsr[2];
620

    
621
            reg.addr = (uintptr_t) &vsr;
622
            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
623

    
624
            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
625
            if (ret < 0) {
626
                DPRINTF("Unable to get %s%d from KVM: %s\n",
627
                        vsx ? "VSR" : "FPR", i, strerror(errno));
628
                return ret;
629
            } else {
630
                env->fpr[i] = vsr[0];
631
                if (vsx) {
632
                    env->vsr[i] = vsr[1];
633
                }
634
            }
635
        }
636
    }
637

    
638
    if (env->insns_flags & PPC_ALTIVEC) {
639
        reg.id = KVM_REG_PPC_VSCR;
640
        reg.addr = (uintptr_t)&env->vscr;
641
        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
642
        if (ret < 0) {
643
            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
644
            return ret;
645
        }
646

    
647
        for (i = 0; i < 32; i++) {
648
            reg.id = KVM_REG_PPC_VR(i);
649
            reg.addr = (uintptr_t)&env->avr[i];
650
            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
651
            if (ret < 0) {
652
                DPRINTF("Unable to get VR%d from KVM: %s\n",
653
                        i, strerror(errno));
654
                return ret;
655
            }
656
        }
657
    }
658

    
659
    return 0;
660
}
661

    
662
#if defined(TARGET_PPC64)
663
static int kvm_get_vpa(CPUState *cs)
664
{
665
    PowerPCCPU *cpu = POWERPC_CPU(cs);
666
    CPUPPCState *env = &cpu->env;
667
    struct kvm_one_reg reg;
668
    int ret;
669

    
670
    reg.id = KVM_REG_PPC_VPA_ADDR;
671
    reg.addr = (uintptr_t)&env->vpa_addr;
672
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
673
    if (ret < 0) {
674
        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
675
        return ret;
676
    }
677

    
678
    assert((uintptr_t)&env->slb_shadow_size
679
           == ((uintptr_t)&env->slb_shadow_addr + 8));
680
    reg.id = KVM_REG_PPC_VPA_SLB;
681
    reg.addr = (uintptr_t)&env->slb_shadow_addr;
682
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
683
    if (ret < 0) {
684
        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
685
                strerror(errno));
686
        return ret;
687
    }
688

    
689
    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
690
    reg.id = KVM_REG_PPC_VPA_DTL;
691
    reg.addr = (uintptr_t)&env->dtl_addr;
692
    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
693
    if (ret < 0) {
694
        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
695
                strerror(errno));
696
        return ret;
697
    }
698

    
699
    return 0;
700
}
701

    
702
static int kvm_put_vpa(CPUState *cs)
703
{
704
    PowerPCCPU *cpu = POWERPC_CPU(cs);
705
    CPUPPCState *env = &cpu->env;
706
    struct kvm_one_reg reg;
707
    int ret;
708

    
709
    /* SLB shadow or DTL can't be registered unless a master VPA is
710
     * registered.  That means when restoring state, if a VPA *is*
711
     * registered, we need to set that up first.  If not, we need to
712
     * deregister the others before deregistering the master VPA */
713
    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
714

    
715
    if (env->vpa_addr) {
716
        reg.id = KVM_REG_PPC_VPA_ADDR;
717
        reg.addr = (uintptr_t)&env->vpa_addr;
718
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
719
        if (ret < 0) {
720
            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
721
            return ret;
722
        }
723
    }
724

    
725
    assert((uintptr_t)&env->slb_shadow_size
726
           == ((uintptr_t)&env->slb_shadow_addr + 8));
727
    reg.id = KVM_REG_PPC_VPA_SLB;
728
    reg.addr = (uintptr_t)&env->slb_shadow_addr;
729
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
730
    if (ret < 0) {
731
        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
732
        return ret;
733
    }
734

    
735
    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
736
    reg.id = KVM_REG_PPC_VPA_DTL;
737
    reg.addr = (uintptr_t)&env->dtl_addr;
738
    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
739
    if (ret < 0) {
740
        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
741
                strerror(errno));
742
        return ret;
743
    }
744

    
745
    if (!env->vpa_addr) {
746
        reg.id = KVM_REG_PPC_VPA_ADDR;
747
        reg.addr = (uintptr_t)&env->vpa_addr;
748
        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
749
        if (ret < 0) {
750
            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
751
            return ret;
752
        }
753
    }
754

    
755
    return 0;
756
}
757
#endif /* TARGET_PPC64 */
758

    
759
int kvm_arch_put_registers(CPUState *cs, int level)
760
{
761
    PowerPCCPU *cpu = POWERPC_CPU(cs);
762
    CPUPPCState *env = &cpu->env;
763
    struct kvm_regs regs;
764
    int ret;
765
    int i;
766

    
767
    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
768
    if (ret < 0) {
769
        return ret;
770
    }
771

    
772
    regs.ctr = env->ctr;
773
    regs.lr  = env->lr;
774
    regs.xer = cpu_read_xer(env);
775
    regs.msr = env->msr;
776
    regs.pc = env->nip;
777

    
778
    regs.srr0 = env->spr[SPR_SRR0];
779
    regs.srr1 = env->spr[SPR_SRR1];
780

    
781
    regs.sprg0 = env->spr[SPR_SPRG0];
782
    regs.sprg1 = env->spr[SPR_SPRG1];
783
    regs.sprg2 = env->spr[SPR_SPRG2];
784
    regs.sprg3 = env->spr[SPR_SPRG3];
785
    regs.sprg4 = env->spr[SPR_SPRG4];
786
    regs.sprg5 = env->spr[SPR_SPRG5];
787
    regs.sprg6 = env->spr[SPR_SPRG6];
788
    regs.sprg7 = env->spr[SPR_SPRG7];
789

    
790
    regs.pid = env->spr[SPR_BOOKE_PID];
791

    
792
    for (i = 0;i < 32; i++)
793
        regs.gpr[i] = env->gpr[i];
794

    
795
    regs.cr = 0;
796
    for (i = 0; i < 8; i++) {
797
        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
798
    }
799

    
800
    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
801
    if (ret < 0)
802
        return ret;
803

    
804
    kvm_put_fp(cs);
805

    
806
    if (env->tlb_dirty) {
807
        kvm_sw_tlb_put(cpu);
808
        env->tlb_dirty = false;
809
    }
810

    
811
    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
812
        struct kvm_sregs sregs;
813

    
814
        sregs.pvr = env->spr[SPR_PVR];
815

    
816
        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
817

    
818
        /* Sync SLB */
819
#ifdef TARGET_PPC64
820
        for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
821
            sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
822
            if (env->slb[i].esid & SLB_ESID_V) {
823
                sregs.u.s.ppc64.slb[i].slbe |= i;
824
            }
825
            sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
826
        }
827
#endif
828

    
829
        /* Sync SRs */
830
        for (i = 0; i < 16; i++) {
831
            sregs.u.s.ppc32.sr[i] = env->sr[i];
832
        }
833

    
834
        /* Sync BATs */
835
        for (i = 0; i < 8; i++) {
836
            /* Beware. We have to swap upper and lower bits here */
837
            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
838
                | env->DBAT[1][i];
839
            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
840
                | env->IBAT[1][i];
841
        }
842

    
843
        ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
844
        if (ret) {
845
            return ret;
846
        }
847
    }
848

    
849
    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
850
        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
851
    }
852

    
853
    if (cap_one_reg) {
854
        int i;
855

    
856
        /* We deliberately ignore errors here, for kernels which have
857
         * the ONE_REG calls, but don't support the specific
858
         * registers, there's a reasonable chance things will still
859
         * work, at least until we try to migrate. */
860
        for (i = 0; i < 1024; i++) {
861
            uint64_t id = env->spr_cb[i].one_reg_id;
862

    
863
            if (id != 0) {
864
                kvm_put_one_spr(cs, id, i);
865
            }
866
        }
867

    
868
#ifdef TARGET_PPC64
869
        if (cap_papr) {
870
            if (kvm_put_vpa(cs) < 0) {
871
                DPRINTF("Warning: Unable to set VPA information to KVM\n");
872
            }
873
        }
874
#endif /* TARGET_PPC64 */
875
    }
876

    
877
    return ret;
878
}
879

    
880
int kvm_arch_get_registers(CPUState *cs)
881
{
882
    PowerPCCPU *cpu = POWERPC_CPU(cs);
883
    CPUPPCState *env = &cpu->env;
884
    struct kvm_regs regs;
885
    struct kvm_sregs sregs;
886
    uint32_t cr;
887
    int i, ret;
888

    
889
    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
890
    if (ret < 0)
891
        return ret;
892

    
893
    cr = regs.cr;
894
    for (i = 7; i >= 0; i--) {
895
        env->crf[i] = cr & 15;
896
        cr >>= 4;
897
    }
898

    
899
    env->ctr = regs.ctr;
900
    env->lr = regs.lr;
901
    cpu_write_xer(env, regs.xer);
902
    env->msr = regs.msr;
903
    env->nip = regs.pc;
904

    
905
    env->spr[SPR_SRR0] = regs.srr0;
906
    env->spr[SPR_SRR1] = regs.srr1;
907

    
908
    env->spr[SPR_SPRG0] = regs.sprg0;
909
    env->spr[SPR_SPRG1] = regs.sprg1;
910
    env->spr[SPR_SPRG2] = regs.sprg2;
911
    env->spr[SPR_SPRG3] = regs.sprg3;
912
    env->spr[SPR_SPRG4] = regs.sprg4;
913
    env->spr[SPR_SPRG5] = regs.sprg5;
914
    env->spr[SPR_SPRG6] = regs.sprg6;
915
    env->spr[SPR_SPRG7] = regs.sprg7;
916

    
917
    env->spr[SPR_BOOKE_PID] = regs.pid;
918

    
919
    for (i = 0;i < 32; i++)
920
        env->gpr[i] = regs.gpr[i];
921

    
922
    kvm_get_fp(cs);
923

    
924
    if (cap_booke_sregs) {
925
        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
926
        if (ret < 0) {
927
            return ret;
928
        }
929

    
930
        if (sregs.u.e.features & KVM_SREGS_E_BASE) {
931
            env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
932
            env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
933
            env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
934
            env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
935
            env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
936
            env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
937
            env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
938
            env->spr[SPR_DECR] = sregs.u.e.dec;
939
            env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
940
            env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
941
            env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
942
        }
943

    
944
        if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
945
            env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
946
            env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
947
            env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
948
            env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
949
            env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
950
        }
951

    
952
        if (sregs.u.e.features & KVM_SREGS_E_64) {
953
            env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
954
        }
955

    
956
        if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
957
            env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
958
        }
959

    
960
        if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
961
            env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
962
            env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
963
            env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
964
            env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
965
            env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
966
            env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
967
            env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
968
            env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
969
            env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
970
            env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
971
            env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
972
            env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
973
            env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
974
            env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
975
            env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
976
            env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
977

    
978
            if (sregs.u.e.features & KVM_SREGS_E_SPE) {
979
                env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
980
                env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
981
                env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
982
            }
983

    
984
            if (sregs.u.e.features & KVM_SREGS_E_PM) {
985
                env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
986
            }
987

    
988
            if (sregs.u.e.features & KVM_SREGS_E_PC) {
989
                env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
990
                env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
991
            }
992
        }
993

    
994
        if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
995
            env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
996
            env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
997
            env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
998
            env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
999
            env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1000
            env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1001
            env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1002
            env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1003
            env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1004
            env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1005
        }
1006

    
1007
        if (sregs.u.e.features & KVM_SREGS_EXP) {
1008
            env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1009
        }
1010

    
1011
        if (sregs.u.e.features & KVM_SREGS_E_PD) {
1012
            env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1013
            env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1014
        }
1015

    
1016
        if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1017
            env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1018
            env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1019
            env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1020

    
1021
            if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1022
                env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1023
                env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1024
            }
1025
        }
1026
    }
1027

    
1028
    if (cap_segstate) {
1029
        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1030
        if (ret < 0) {
1031
            return ret;
1032
        }
1033

    
1034
        if (!env->external_htab) {
1035
            ppc_store_sdr1(env, sregs.u.s.sdr1);
1036
        }
1037

    
1038
        /* Sync SLB */
1039
#ifdef TARGET_PPC64
1040
        /*
1041
         * The packed SLB array we get from KVM_GET_SREGS only contains
1042
         * information about valid entries. So we flush our internal
1043
         * copy to get rid of stale ones, then put all valid SLB entries
1044
         * back in.
1045
         */
1046
        memset(env->slb, 0, sizeof(env->slb));
1047
        for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1048
            target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1049
            target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1050
            /*
1051
             * Only restore valid entries
1052
             */
1053
            if (rb & SLB_ESID_V) {
1054
                ppc_store_slb(env, rb, rs);
1055
            }
1056
        }
1057
#endif
1058

    
1059
        /* Sync SRs */
1060
        for (i = 0; i < 16; i++) {
1061
            env->sr[i] = sregs.u.s.ppc32.sr[i];
1062
        }
1063

    
1064
        /* Sync BATs */
1065
        for (i = 0; i < 8; i++) {
1066
            env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1067
            env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1068
            env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1069
            env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1070
        }
1071
    }
1072

    
1073
    if (cap_hior) {
1074
        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1075
    }
1076

    
1077
    if (cap_one_reg) {
1078
        int i;
1079

    
1080
        /* We deliberately ignore errors here, for kernels which have
1081
         * the ONE_REG calls, but don't support the specific
1082
         * registers, there's a reasonable chance things will still
1083
         * work, at least until we try to migrate. */
1084
        for (i = 0; i < 1024; i++) {
1085
            uint64_t id = env->spr_cb[i].one_reg_id;
1086

    
1087
            if (id != 0) {
1088
                kvm_get_one_spr(cs, id, i);
1089
            }
1090
        }
1091

    
1092
#ifdef TARGET_PPC64
1093
        if (cap_papr) {
1094
            if (kvm_get_vpa(cs) < 0) {
1095
                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1096
            }
1097
        }
1098
#endif
1099
    }
1100

    
1101
    return 0;
1102
}
1103

    
1104
int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1105
{
1106
    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1107

    
1108
    if (irq != PPC_INTERRUPT_EXT) {
1109
        return 0;
1110
    }
1111

    
1112
    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1113
        return 0;
1114
    }
1115

    
1116
    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1117

    
1118
    return 0;
1119
}
1120

    
1121
#if defined(TARGET_PPCEMB)
1122
#define PPC_INPUT_INT PPC40x_INPUT_INT
1123
#elif defined(TARGET_PPC64)
1124
#define PPC_INPUT_INT PPC970_INPUT_INT
1125
#else
1126
#define PPC_INPUT_INT PPC6xx_INPUT_INT
1127
#endif
1128

    
1129
void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1130
{
1131
    PowerPCCPU *cpu = POWERPC_CPU(cs);
1132
    CPUPPCState *env = &cpu->env;
1133
    int r;
1134
    unsigned irq;
1135

    
1136
    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1137
     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1138
    if (!cap_interrupt_level &&
1139
        run->ready_for_interrupt_injection &&
1140
        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1141
        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1142
    {
1143
        /* For now KVM disregards the 'irq' argument. However, in the
1144
         * future KVM could cache it in-kernel to avoid a heavyweight exit
1145
         * when reading the UIC.
1146
         */
1147
        irq = KVM_INTERRUPT_SET;
1148

    
1149
        DPRINTF("injected interrupt %d\n", irq);
1150
        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1151
        if (r < 0) {
1152
            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1153
        }
1154

    
1155
        /* Always wake up soon in case the interrupt was level based */
1156
        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1157
                       (get_ticks_per_sec() / 50));
1158
    }
1159

    
1160
    /* We don't know if there are more interrupts pending after this. However,
1161
     * the guest will return to userspace in the course of handling this one
1162
     * anyways, so we will get a chance to deliver the rest. */
1163
}
1164

    
1165
void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1166
{
1167
}
1168

    
1169
int kvm_arch_process_async_events(CPUState *cs)
1170
{
1171
    return cs->halted;
1172
}
1173

    
1174
static int kvmppc_handle_halt(PowerPCCPU *cpu)
1175
{
1176
    CPUState *cs = CPU(cpu);
1177
    CPUPPCState *env = &cpu->env;
1178

    
1179
    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1180
        cs->halted = 1;
1181
        env->exception_index = EXCP_HLT;
1182
    }
1183

    
1184
    return 0;
1185
}
1186

    
1187
/* map dcr access to existing qemu dcr emulation */
1188
static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1189
{
1190
    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1191
        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1192

    
1193
    return 0;
1194
}
1195

    
1196
static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1197
{
1198
    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1199
        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1200

    
1201
    return 0;
1202
}
1203

    
1204
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1205
{
1206
    PowerPCCPU *cpu = POWERPC_CPU(cs);
1207
    CPUPPCState *env = &cpu->env;
1208
    int ret;
1209

    
1210
    switch (run->exit_reason) {
1211
    case KVM_EXIT_DCR:
1212
        if (run->dcr.is_write) {
1213
            DPRINTF("handle dcr write\n");
1214
            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1215
        } else {
1216
            DPRINTF("handle dcr read\n");
1217
            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1218
        }
1219
        break;
1220
    case KVM_EXIT_HLT:
1221
        DPRINTF("handle halt\n");
1222
        ret = kvmppc_handle_halt(cpu);
1223
        break;
1224
#if defined(TARGET_PPC64)
1225
    case KVM_EXIT_PAPR_HCALL:
1226
        DPRINTF("handle PAPR hypercall\n");
1227
        run->papr_hcall.ret = spapr_hypercall(cpu,
1228
                                              run->papr_hcall.nr,
1229
                                              run->papr_hcall.args);
1230
        ret = 0;
1231
        break;
1232
#endif
1233
    case KVM_EXIT_EPR:
1234
        DPRINTF("handle epr\n");
1235
        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1236
        ret = 0;
1237
        break;
1238
    case KVM_EXIT_WATCHDOG:
1239
        DPRINTF("handle watchdog expiry\n");
1240
        watchdog_perform_action();
1241
        ret = 0;
1242
        break;
1243

    
1244
    default:
1245
        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1246
        ret = -1;
1247
        break;
1248
    }
1249

    
1250
    return ret;
1251
}
1252

    
1253
int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1254
{
1255
    CPUState *cs = CPU(cpu);
1256
    uint32_t bits = tsr_bits;
1257
    struct kvm_one_reg reg = {
1258
        .id = KVM_REG_PPC_OR_TSR,
1259
        .addr = (uintptr_t) &bits,
1260
    };
1261

    
1262
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1263
}
1264

    
1265
int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1266
{
1267

    
1268
    CPUState *cs = CPU(cpu);
1269
    uint32_t bits = tsr_bits;
1270
    struct kvm_one_reg reg = {
1271
        .id = KVM_REG_PPC_CLEAR_TSR,
1272
        .addr = (uintptr_t) &bits,
1273
    };
1274

    
1275
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1276
}
1277

    
1278
int kvmppc_set_tcr(PowerPCCPU *cpu)
1279
{
1280
    CPUState *cs = CPU(cpu);
1281
    CPUPPCState *env = &cpu->env;
1282
    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1283

    
1284
    struct kvm_one_reg reg = {
1285
        .id = KVM_REG_PPC_TCR,
1286
        .addr = (uintptr_t) &tcr,
1287
    };
1288

    
1289
    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1290
}
1291

    
1292
int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1293
{
1294
    CPUState *cs = CPU(cpu);
1295
    struct kvm_enable_cap encap = {};
1296
    int ret;
1297

    
1298
    if (!kvm_enabled()) {
1299
        return -1;
1300
    }
1301

    
1302
    if (!cap_ppc_watchdog) {
1303
        printf("warning: KVM does not support watchdog");
1304
        return -1;
1305
    }
1306

    
1307
    encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1308
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1309
    if (ret < 0) {
1310
        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1311
                __func__, strerror(-ret));
1312
        return ret;
1313
    }
1314

    
1315
    return ret;
1316
}
1317

    
1318
static int read_cpuinfo(const char *field, char *value, int len)
1319
{
1320
    FILE *f;
1321
    int ret = -1;
1322
    int field_len = strlen(field);
1323
    char line[512];
1324

    
1325
    f = fopen("/proc/cpuinfo", "r");
1326
    if (!f) {
1327
        return -1;
1328
    }
1329

    
1330
    do {
1331
        if(!fgets(line, sizeof(line), f)) {
1332
            break;
1333
        }
1334
        if (!strncmp(line, field, field_len)) {
1335
            pstrcpy(value, len, line);
1336
            ret = 0;
1337
            break;
1338
        }
1339
    } while(*line);
1340

    
1341
    fclose(f);
1342

    
1343
    return ret;
1344
}
1345

    
1346
uint32_t kvmppc_get_tbfreq(void)
1347
{
1348
    char line[512];
1349
    char *ns;
1350
    uint32_t retval = get_ticks_per_sec();
1351

    
1352
    if (read_cpuinfo("timebase", line, sizeof(line))) {
1353
        return retval;
1354
    }
1355

    
1356
    if (!(ns = strchr(line, ':'))) {
1357
        return retval;
1358
    }
1359

    
1360
    ns++;
1361

    
1362
    retval = atoi(ns);
1363
    return retval;
1364
}
1365

    
1366
/* Try to find a device tree node for a CPU with clock-frequency property */
1367
static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1368
{
1369
    struct dirent *dirp;
1370
    DIR *dp;
1371

    
1372
    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1373
        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1374
        return -1;
1375
    }
1376

    
1377
    buf[0] = '\0';
1378
    while ((dirp = readdir(dp)) != NULL) {
1379
        FILE *f;
1380
        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1381
                 dirp->d_name);
1382
        f = fopen(buf, "r");
1383
        if (f) {
1384
            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1385
            fclose(f);
1386
            break;
1387
        }
1388
        buf[0] = '\0';
1389
    }
1390
    closedir(dp);
1391
    if (buf[0] == '\0') {
1392
        printf("Unknown host!\n");
1393
        return -1;
1394
    }
1395

    
1396
    return 0;
1397
}
1398

    
1399
/* Read a CPU node property from the host device tree that's a single
1400
 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1401
 * (can't find or open the property, or doesn't understand the
1402
 * format) */
1403
static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1404
{
1405
    char buf[PATH_MAX];
1406
    union {
1407
        uint32_t v32;
1408
        uint64_t v64;
1409
    } u;
1410
    FILE *f;
1411
    int len;
1412

    
1413
    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1414
        return -1;
1415
    }
1416

    
1417
    strncat(buf, "/", sizeof(buf) - strlen(buf));
1418
    strncat(buf, propname, sizeof(buf) - strlen(buf));
1419

    
1420
    f = fopen(buf, "rb");
1421
    if (!f) {
1422
        return -1;
1423
    }
1424

    
1425
    len = fread(&u, 1, sizeof(u), f);
1426
    fclose(f);
1427
    switch (len) {
1428
    case 4:
1429
        /* property is a 32-bit quantity */
1430
        return be32_to_cpu(u.v32);
1431
    case 8:
1432
        return be64_to_cpu(u.v64);
1433
    }
1434

    
1435
    return 0;
1436
}
1437

    
1438
uint64_t kvmppc_get_clockfreq(void)
1439
{
1440
    return kvmppc_read_int_cpu_dt("clock-frequency");
1441
}
1442

    
1443
uint32_t kvmppc_get_vmx(void)
1444
{
1445
    return kvmppc_read_int_cpu_dt("ibm,vmx");
1446
}
1447

    
1448
uint32_t kvmppc_get_dfp(void)
1449
{
1450
    return kvmppc_read_int_cpu_dt("ibm,dfp");
1451
}
1452

    
1453
static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1454
 {
1455
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1456
     CPUState *cs = CPU(cpu);
1457

    
1458
    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1459
        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1460
        return 0;
1461
    }
1462

    
1463
    return 1;
1464
}
1465

    
1466
int kvmppc_get_hasidle(CPUPPCState *env)
1467
{
1468
    struct kvm_ppc_pvinfo pvinfo;
1469

    
1470
    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1471
        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1472
        return 1;
1473
    }
1474

    
1475
    return 0;
1476
}
1477

    
1478
int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1479
{
1480
    uint32_t *hc = (uint32_t*)buf;
1481
    struct kvm_ppc_pvinfo pvinfo;
1482

    
1483
    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1484
        memcpy(buf, pvinfo.hcall, buf_len);
1485
        return 0;
1486
    }
1487

    
1488
    /*
1489
     * Fallback to always fail hypercalls:
1490
     *
1491
     *     li r3, -1
1492
     *     nop
1493
     *     nop
1494
     *     nop
1495
     */
1496

    
1497
    hc[0] = 0x3860ffff;
1498
    hc[1] = 0x60000000;
1499
    hc[2] = 0x60000000;
1500
    hc[3] = 0x60000000;
1501

    
1502
    return 0;
1503
}
1504

    
1505
void kvmppc_set_papr(PowerPCCPU *cpu)
1506
{
1507
    CPUPPCState *env = &cpu->env;
1508
    CPUState *cs = CPU(cpu);
1509
    struct kvm_enable_cap cap = {};
1510
    int ret;
1511

    
1512
    cap.cap = KVM_CAP_PPC_PAPR;
1513
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1514

    
1515
    if (ret) {
1516
        cpu_abort(env, "This KVM version does not support PAPR\n");
1517
    }
1518

    
1519
    /* Update the capability flag so we sync the right information
1520
     * with kvm */
1521
    cap_papr = 1;
1522
}
1523

    
1524
void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1525
{
1526
    CPUPPCState *env = &cpu->env;
1527
    CPUState *cs = CPU(cpu);
1528
    struct kvm_enable_cap cap = {};
1529
    int ret;
1530

    
1531
    cap.cap = KVM_CAP_PPC_EPR;
1532
    cap.args[0] = mpic_proxy;
1533
    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1534

    
1535
    if (ret && mpic_proxy) {
1536
        cpu_abort(env, "This KVM version does not support EPR\n");
1537
    }
1538
}
1539

    
1540
int kvmppc_smt_threads(void)
1541
{
1542
    return cap_ppc_smt ? cap_ppc_smt : 1;
1543
}
1544

    
1545
#ifdef TARGET_PPC64
1546
off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1547
{
1548
    void *rma;
1549
    off_t size;
1550
    int fd;
1551
    struct kvm_allocate_rma ret;
1552
    MemoryRegion *rma_region;
1553

    
1554
    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1555
     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1556
     *                      not necessary on this hardware
1557
     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1558
     *
1559
     * FIXME: We should allow the user to force contiguous RMA
1560
     * allocation in the cap_ppc_rma==1 case.
1561
     */
1562
    if (cap_ppc_rma < 2) {
1563
        return 0;
1564
    }
1565

    
1566
    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1567
    if (fd < 0) {
1568
        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1569
                strerror(errno));
1570
        return -1;
1571
    }
1572

    
1573
    size = MIN(ret.rma_size, 256ul << 20);
1574

    
1575
    rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1576
    if (rma == MAP_FAILED) {
1577
        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1578
        return -1;
1579
    };
1580

    
1581
    rma_region = g_new(MemoryRegion, 1);
1582
    memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1583
    vmstate_register_ram_global(rma_region);
1584
    memory_region_add_subregion(sysmem, 0, rma_region);
1585

    
1586
    return size;
1587
}
1588

    
1589
uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1590
{
1591
    struct kvm_ppc_smmu_info info;
1592
    long rampagesize, best_page_shift;
1593
    int i;
1594

    
1595
    if (cap_ppc_rma >= 2) {
1596
        return current_size;
1597
    }
1598

    
1599
    /* Find the largest hardware supported page size that's less than
1600
     * or equal to the (logical) backing page size of guest RAM */
1601
    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1602
    rampagesize = getrampagesize();
1603
    best_page_shift = 0;
1604

    
1605
    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1606
        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1607

    
1608
        if (!sps->page_shift) {
1609
            continue;
1610
        }
1611

    
1612
        if ((sps->page_shift > best_page_shift)
1613
            && ((1UL << sps->page_shift) <= rampagesize)) {
1614
            best_page_shift = sps->page_shift;
1615
        }
1616
    }
1617

    
1618
    return MIN(current_size,
1619
               1ULL << (best_page_shift + hash_shift - 7));
1620
}
1621
#endif
1622

    
1623
void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1624
{
1625
    struct kvm_create_spapr_tce args = {
1626
        .liobn = liobn,
1627
        .window_size = window_size,
1628
    };
1629
    long len;
1630
    int fd;
1631
    void *table;
1632

    
1633
    /* Must set fd to -1 so we don't try to munmap when called for
1634
     * destroying the table, which the upper layers -will- do
1635
     */
1636
    *pfd = -1;
1637
    if (!cap_spapr_tce) {
1638
        return NULL;
1639
    }
1640

    
1641
    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1642
    if (fd < 0) {
1643
        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1644
                liobn);
1645
        return NULL;
1646
    }
1647

    
1648
    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1649
    /* FIXME: round this up to page size */
1650

    
1651
    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1652
    if (table == MAP_FAILED) {
1653
        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1654
                liobn);
1655
        close(fd);
1656
        return NULL;
1657
    }
1658

    
1659
    *pfd = fd;
1660
    return table;
1661
}
1662

    
1663
int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1664
{
1665
    long len;
1666

    
1667
    if (fd < 0) {
1668
        return -1;
1669
    }
1670

    
1671
    len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1672
    if ((munmap(table, len) < 0) ||
1673
        (close(fd) < 0)) {
1674
        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1675
                strerror(errno));
1676
        /* Leak the table */
1677
    }
1678

    
1679
    return 0;
1680
}
1681

    
1682
int kvmppc_reset_htab(int shift_hint)
1683
{
1684
    uint32_t shift = shift_hint;
1685

    
1686
    if (!kvm_enabled()) {
1687
        /* Full emulation, tell caller to allocate htab itself */
1688
        return 0;
1689
    }
1690
    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1691
        int ret;
1692
        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1693
        if (ret == -ENOTTY) {
1694
            /* At least some versions of PR KVM advertise the
1695
             * capability, but don't implement the ioctl().  Oops.
1696
             * Return 0 so that we allocate the htab in qemu, as is
1697
             * correct for PR. */
1698
            return 0;
1699
        } else if (ret < 0) {
1700
            return ret;
1701
        }
1702
        return shift;
1703
    }
1704

    
1705
    /* We have a kernel that predates the htab reset calls.  For PR
1706
     * KVM, we need to allocate the htab ourselves, for an HV KVM of
1707
     * this era, it has allocated a 16MB fixed size hash table
1708
     * already.  Kernels of this era have the GET_PVINFO capability
1709
     * only on PR, so we use this hack to determine the right
1710
     * answer */
1711
    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1712
        /* PR - tell caller to allocate htab */
1713
        return 0;
1714
    } else {
1715
        /* HV - assume 16MB kernel allocated htab */
1716
        return 24;
1717
    }
1718
}
1719

    
1720
static inline uint32_t mfpvr(void)
1721
{
1722
    uint32_t pvr;
1723

    
1724
    asm ("mfpvr %0"
1725
         : "=r"(pvr));
1726
    return pvr;
1727
}
1728

    
1729
static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1730
{
1731
    if (on) {
1732
        *word |= flags;
1733
    } else {
1734
        *word &= ~flags;
1735
    }
1736
}
1737

    
1738
static void kvmppc_host_cpu_initfn(Object *obj)
1739
{
1740
    assert(kvm_enabled());
1741
}
1742

    
1743
static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1744
{
1745
    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1746
    uint32_t vmx = kvmppc_get_vmx();
1747
    uint32_t dfp = kvmppc_get_dfp();
1748
    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1749
    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1750

    
1751
    /* Now fix up the class with information we can query from the host */
1752
    pcc->pvr = mfpvr();
1753

    
1754
    if (vmx != -1) {
1755
        /* Only override when we know what the host supports */
1756
        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1757
        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1758
    }
1759
    if (dfp != -1) {
1760
        /* Only override when we know what the host supports */
1761
        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1762
    }
1763

    
1764
    if (dcache_size != -1) {
1765
        pcc->l1_dcache_size = dcache_size;
1766
    }
1767

    
1768
    if (icache_size != -1) {
1769
        pcc->l1_icache_size = icache_size;
1770
    }
1771
}
1772

    
1773
int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1774
{
1775
    CPUState *cs = CPU(cpu);
1776
    int smt;
1777

    
1778
    /* Adjust cpu index for SMT */
1779
    smt = kvmppc_smt_threads();
1780
    cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1781
        + (cs->cpu_index % smp_threads);
1782

    
1783
    return 0;
1784
}
1785

    
1786
bool kvmppc_has_cap_epr(void)
1787
{
1788
    return cap_epr;
1789
}
1790

    
1791
bool kvmppc_has_cap_htab_fd(void)
1792
{
1793
    return cap_htab_fd;
1794
}
1795

    
1796
static int kvm_ppc_register_host_cpu_type(void)
1797
{
1798
    TypeInfo type_info = {
1799
        .name = TYPE_HOST_POWERPC_CPU,
1800
        .instance_init = kvmppc_host_cpu_initfn,
1801
        .class_init = kvmppc_host_cpu_class_init,
1802
    };
1803
    uint32_t host_pvr = mfpvr();
1804
    PowerPCCPUClass *pvr_pcc;
1805

    
1806
    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1807
    if (pvr_pcc == NULL) {
1808
        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1809
    }
1810
    if (pvr_pcc == NULL) {
1811
        return -1;
1812
    }
1813
    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1814
    type_register(&type_info);
1815
    return 0;
1816
}
1817

    
1818
int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1819
{
1820
    struct kvm_rtas_token_args args = {
1821
        .token = token,
1822
    };
1823

    
1824
    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1825
        return -ENOENT;
1826
    }
1827

    
1828
    strncpy(args.name, function, sizeof(args.name));
1829

    
1830
    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1831
}
1832

    
1833
int kvmppc_get_htab_fd(bool write)
1834
{
1835
    struct kvm_get_htab_fd s = {
1836
        .flags = write ? KVM_GET_HTAB_WRITE : 0,
1837
        .start_index = 0,
1838
    };
1839

    
1840
    if (!cap_htab_fd) {
1841
        fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1842
        return -1;
1843
    }
1844

    
1845
    return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1846
}
1847

    
1848
int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1849
{
1850
    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1851
    uint8_t buf[bufsize];
1852
    ssize_t rc;
1853

    
1854
    do {
1855
        rc = read(fd, buf, bufsize);
1856
        if (rc < 0) {
1857
            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1858
                    strerror(errno));
1859
            return rc;
1860
        } else if (rc) {
1861
            /* Kernel already retuns data in BE format for the file */
1862
            qemu_put_buffer(f, buf, rc);
1863
        }
1864
    } while ((rc != 0)
1865
             && ((max_ns < 0)
1866
                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1867

    
1868
    return (rc == 0) ? 1 : 0;
1869
}
1870

    
1871
int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1872
                           uint16_t n_valid, uint16_t n_invalid)
1873
{
1874
    struct kvm_get_htab_header *buf;
1875
    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1876
    ssize_t rc;
1877

    
1878
    buf = alloca(chunksize);
1879
    /* This is KVM on ppc, so this is all big-endian */
1880
    buf->index = index;
1881
    buf->n_valid = n_valid;
1882
    buf->n_invalid = n_invalid;
1883

    
1884
    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1885

    
1886
    rc = write(fd, buf, chunksize);
1887
    if (rc < 0) {
1888
        fprintf(stderr, "Error writing KVM hash table: %s\n",
1889
                strerror(errno));
1890
        return rc;
1891
    }
1892
    if (rc != chunksize) {
1893
        /* We should never get a short write on a single chunk */
1894
        fprintf(stderr, "Short write, restoring KVM hash table\n");
1895
        return -1;
1896
    }
1897
    return 0;
1898
}
1899

    
1900
bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1901
{
1902
    return true;
1903
}
1904

    
1905
int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1906
{
1907
    return 1;
1908
}
1909

    
1910
int kvm_arch_on_sigbus(int code, void *addr)
1911
{
1912
    return 1;
1913
}
1914

    
1915
void kvm_arch_init_irq_routing(KVMState *s)
1916
{
1917
}
1918

    
1919
int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1920
{
1921
    return -EINVAL;
1922
}
1923

    
1924
int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1925
{
1926
    return -EINVAL;
1927
}
1928

    
1929
int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1930
{
1931
    return -EINVAL;
1932
}
1933

    
1934
int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1935
{
1936
    return -EINVAL;
1937
}
1938

    
1939
void kvm_arch_remove_all_hw_breakpoints(void)
1940
{
1941
}
1942

    
1943
void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1944
{
1945
}
1946

    
1947
struct kvm_get_htab_buf {
1948
    struct kvm_get_htab_header header;
1949
    /*
1950
     * We require one extra byte for read
1951
     */
1952
    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1953
};
1954

    
1955
uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1956
{
1957
    int htab_fd;
1958
    struct kvm_get_htab_fd ghf;
1959
    struct kvm_get_htab_buf  *hpte_buf;
1960

    
1961
    ghf.flags = 0;
1962
    ghf.start_index = pte_index;
1963
    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1964
    if (htab_fd < 0) {
1965
        goto error_out;
1966
    }
1967

    
1968
    hpte_buf = g_malloc0(sizeof(*hpte_buf));
1969
    /*
1970
     * Read the hpte group
1971
     */
1972
    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1973
        goto out_close;
1974
    }
1975

    
1976
    close(htab_fd);
1977
    return (uint64_t)(uintptr_t) hpte_buf->hpte;
1978

    
1979
out_close:
1980
    g_free(hpte_buf);
1981
    close(htab_fd);
1982
error_out:
1983
    return 0;
1984
}
1985

    
1986
void kvmppc_hash64_free_pteg(uint64_t token)
1987
{
1988
    struct kvm_get_htab_buf *htab_buf;
1989

    
1990
    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1991
                            hpte);
1992
    g_free(htab_buf);
1993
    return;
1994
}