Statistics
| Branch: | Revision:

root / target-ppc / kvm.c @ 6148b23d

History | View | Annotate | Download (24.3 kB)

1
/*
2
 * PowerPC implementation of KVM hooks
3
 *
4
 * Copyright IBM Corp. 2007
5
 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6
 *
7
 * Authors:
8
 *  Jerone Young <jyoung5@us.ibm.com>
9
 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10
 *  Hollis Blanchard <hollisb@us.ibm.com>
11
 *
12
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13
 * See the COPYING file in the top-level directory.
14
 *
15
 */
16

    
17
#include <dirent.h>
18
#include <sys/types.h>
19
#include <sys/ioctl.h>
20
#include <sys/mman.h>
21

    
22
#include <linux/kvm.h>
23

    
24
#include "qemu-common.h"
25
#include "qemu-timer.h"
26
#include "sysemu.h"
27
#include "kvm.h"
28
#include "kvm_ppc.h"
29
#include "cpu.h"
30
#include "device_tree.h"
31
#include "hw/sysbus.h"
32
#include "hw/spapr.h"
33

    
34
#include "hw/sysbus.h"
35
#include "hw/spapr.h"
36
#include "hw/spapr_vio.h"
37

    
38
//#define DEBUG_KVM
39

    
40
#ifdef DEBUG_KVM
41
#define dprintf(fmt, ...) \
42
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
43
#else
44
#define dprintf(fmt, ...) \
45
    do { } while (0)
46
#endif
47

    
48
#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
49

    
50
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
51
    KVM_CAP_LAST_INFO
52
};
53

    
54
static int cap_interrupt_unset = false;
55
static int cap_interrupt_level = false;
56
static int cap_segstate;
57
static int cap_booke_sregs;
58
static int cap_ppc_smt;
59
static int cap_ppc_rma;
60
static int cap_spapr_tce;
61

    
62
/* XXX We have a race condition where we actually have a level triggered
63
 *     interrupt, but the infrastructure can't expose that yet, so the guest
64
 *     takes but ignores it, goes to sleep and never gets notified that there's
65
 *     still an interrupt pending.
66
 *
67
 *     As a quick workaround, let's just wake up again 20 ms after we injected
68
 *     an interrupt. That way we can assure that we're always reinjecting
69
 *     interrupts in case the guest swallowed them.
70
 */
71
static QEMUTimer *idle_timer;
72

    
73
static void kvm_kick_env(void *env)
74
{
75
    qemu_cpu_kick(env);
76
}
77

    
78
int kvm_arch_init(KVMState *s)
79
{
80
    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
81
    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
82
    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
83
    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
84
    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
85
    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
86
    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
87

    
88
    if (!cap_interrupt_level) {
89
        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
90
                        "VM to stall at times!\n");
91
    }
92

    
93
    return 0;
94
}
95

    
96
static int kvm_arch_sync_sregs(CPUState *cenv)
97
{
98
    struct kvm_sregs sregs;
99
    int ret;
100

    
101
    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
102
        /* What we're really trying to say is "if we're on BookE, we use
103
           the native PVR for now". This is the only sane way to check
104
           it though, so we potentially confuse users that they can run
105
           BookE guests on BookS. Let's hope nobody dares enough :) */
106
        return 0;
107
    } else {
108
        if (!cap_segstate) {
109
            fprintf(stderr, "kvm error: missing PVR setting capability\n");
110
            return -ENOSYS;
111
        }
112
    }
113

    
114
    ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
115
    if (ret) {
116
        return ret;
117
    }
118

    
119
    sregs.pvr = cenv->spr[SPR_PVR];
120
    return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
121
}
122

    
123
/* Set up a shared TLB array with KVM */
124
static int kvm_booke206_tlb_init(CPUState *env)
125
{
126
    struct kvm_book3e_206_tlb_params params = {};
127
    struct kvm_config_tlb cfg = {};
128
    struct kvm_enable_cap encap = {};
129
    unsigned int entries = 0;
130
    int ret, i;
131

    
132
    if (!kvm_enabled() ||
133
        !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
134
        return 0;
135
    }
136

    
137
    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
138

    
139
    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
140
        params.tlb_sizes[i] = booke206_tlb_size(env, i);
141
        params.tlb_ways[i] = booke206_tlb_ways(env, i);
142
        entries += params.tlb_sizes[i];
143
    }
144

    
145
    assert(entries == env->nb_tlb);
146
    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
147

    
148
    env->tlb_dirty = true;
149

    
150
    cfg.array = (uintptr_t)env->tlb.tlbm;
151
    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
152
    cfg.params = (uintptr_t)&params;
153
    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
154

    
155
    encap.cap = KVM_CAP_SW_TLB;
156
    encap.args[0] = (uintptr_t)&cfg;
157

    
158
    ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
159
    if (ret < 0) {
160
        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
161
                __func__, strerror(-ret));
162
        return ret;
163
    }
164

    
165
    env->kvm_sw_tlb = true;
166
    return 0;
167
}
168

    
169
int kvm_arch_init_vcpu(CPUState *cenv)
170
{
171
    int ret;
172

    
173
    ret = kvm_arch_sync_sregs(cenv);
174
    if (ret) {
175
        return ret;
176
    }
177

    
178
    idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
179

    
180
    /* Some targets support access to KVM's guest TLB. */
181
    switch (cenv->mmu_model) {
182
    case POWERPC_MMU_BOOKE206:
183
        ret = kvm_booke206_tlb_init(cenv);
184
        break;
185
    default:
186
        break;
187
    }
188

    
189
    return ret;
190
}
191

    
192
void kvm_arch_reset_vcpu(CPUState *env)
193
{
194
}
195

    
196
static void kvm_sw_tlb_put(CPUState *env)
197
{
198
    struct kvm_dirty_tlb dirty_tlb;
199
    unsigned char *bitmap;
200
    int ret;
201

    
202
    if (!env->kvm_sw_tlb) {
203
        return;
204
    }
205

    
206
    bitmap = g_malloc((env->nb_tlb + 7) / 8);
207
    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
208

    
209
    dirty_tlb.bitmap = (uintptr_t)bitmap;
210
    dirty_tlb.num_dirty = env->nb_tlb;
211

    
212
    ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
213
    if (ret) {
214
        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
215
                __func__, strerror(-ret));
216
    }
217

    
218
    g_free(bitmap);
219
}
220

    
221
int kvm_arch_put_registers(CPUState *env, int level)
222
{
223
    struct kvm_regs regs;
224
    int ret;
225
    int i;
226

    
227
    ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
228
    if (ret < 0)
229
        return ret;
230

    
231
    regs.ctr = env->ctr;
232
    regs.lr  = env->lr;
233
    regs.xer = env->xer;
234
    regs.msr = env->msr;
235
    regs.pc = env->nip;
236

    
237
    regs.srr0 = env->spr[SPR_SRR0];
238
    regs.srr1 = env->spr[SPR_SRR1];
239

    
240
    regs.sprg0 = env->spr[SPR_SPRG0];
241
    regs.sprg1 = env->spr[SPR_SPRG1];
242
    regs.sprg2 = env->spr[SPR_SPRG2];
243
    regs.sprg3 = env->spr[SPR_SPRG3];
244
    regs.sprg4 = env->spr[SPR_SPRG4];
245
    regs.sprg5 = env->spr[SPR_SPRG5];
246
    regs.sprg6 = env->spr[SPR_SPRG6];
247
    regs.sprg7 = env->spr[SPR_SPRG7];
248

    
249
    regs.pid = env->spr[SPR_BOOKE_PID];
250

    
251
    for (i = 0;i < 32; i++)
252
        regs.gpr[i] = env->gpr[i];
253

    
254
    ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
255
    if (ret < 0)
256
        return ret;
257

    
258
    if (env->tlb_dirty) {
259
        kvm_sw_tlb_put(env);
260
        env->tlb_dirty = false;
261
    }
262

    
263
    return ret;
264
}
265

    
266
int kvm_arch_get_registers(CPUState *env)
267
{
268
    struct kvm_regs regs;
269
    struct kvm_sregs sregs;
270
    uint32_t cr;
271
    int i, ret;
272

    
273
    ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
274
    if (ret < 0)
275
        return ret;
276

    
277
    cr = regs.cr;
278
    for (i = 7; i >= 0; i--) {
279
        env->crf[i] = cr & 15;
280
        cr >>= 4;
281
    }
282

    
283
    env->ctr = regs.ctr;
284
    env->lr = regs.lr;
285
    env->xer = regs.xer;
286
    env->msr = regs.msr;
287
    env->nip = regs.pc;
288

    
289
    env->spr[SPR_SRR0] = regs.srr0;
290
    env->spr[SPR_SRR1] = regs.srr1;
291

    
292
    env->spr[SPR_SPRG0] = regs.sprg0;
293
    env->spr[SPR_SPRG1] = regs.sprg1;
294
    env->spr[SPR_SPRG2] = regs.sprg2;
295
    env->spr[SPR_SPRG3] = regs.sprg3;
296
    env->spr[SPR_SPRG4] = regs.sprg4;
297
    env->spr[SPR_SPRG5] = regs.sprg5;
298
    env->spr[SPR_SPRG6] = regs.sprg6;
299
    env->spr[SPR_SPRG7] = regs.sprg7;
300

    
301
    env->spr[SPR_BOOKE_PID] = regs.pid;
302

    
303
    for (i = 0;i < 32; i++)
304
        env->gpr[i] = regs.gpr[i];
305

    
306
    if (cap_booke_sregs) {
307
        ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
308
        if (ret < 0) {
309
            return ret;
310
        }
311

    
312
        if (sregs.u.e.features & KVM_SREGS_E_BASE) {
313
            env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
314
            env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
315
            env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
316
            env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
317
            env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
318
            env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
319
            env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
320
            env->spr[SPR_DECR] = sregs.u.e.dec;
321
            env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
322
            env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
323
            env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
324
        }
325

    
326
        if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
327
            env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
328
            env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
329
            env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
330
            env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
331
            env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
332
        }
333

    
334
        if (sregs.u.e.features & KVM_SREGS_E_64) {
335
            env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
336
        }
337

    
338
        if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
339
            env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
340
        }
341

    
342
        if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
343
            env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
344
            env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
345
            env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
346
            env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
347
            env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
348
            env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
349
            env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
350
            env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
351
            env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
352
            env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
353
            env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
354
            env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
355
            env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
356
            env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
357
            env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
358
            env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
359

    
360
            if (sregs.u.e.features & KVM_SREGS_E_SPE) {
361
                env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
362
                env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
363
                env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
364
            }
365

    
366
            if (sregs.u.e.features & KVM_SREGS_E_PM) {
367
                env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
368
            }
369

    
370
            if (sregs.u.e.features & KVM_SREGS_E_PC) {
371
                env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
372
                env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
373
            }
374
        }
375

    
376
        if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
377
            env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
378
            env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
379
            env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
380
            env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
381
            env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
382
            env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
383
            env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
384
            env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
385
            env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
386
            env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
387
        }
388

    
389
        if (sregs.u.e.features & KVM_SREGS_EXP) {
390
            env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
391
        }
392

    
393
        if (sregs.u.e.features & KVM_SREGS_E_PD) {
394
            env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
395
            env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
396
        }
397

    
398
        if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
399
            env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
400
            env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
401
            env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
402

    
403
            if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
404
                env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
405
                env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
406
            }
407
        }
408
    }
409

    
410
    if (cap_segstate) {
411
        ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
412
        if (ret < 0) {
413
            return ret;
414
        }
415

    
416
        ppc_store_sdr1(env, sregs.u.s.sdr1);
417

    
418
        /* Sync SLB */
419
#ifdef TARGET_PPC64
420
        for (i = 0; i < 64; i++) {
421
            ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
422
                               sregs.u.s.ppc64.slb[i].slbv);
423
        }
424
#endif
425

    
426
        /* Sync SRs */
427
        for (i = 0; i < 16; i++) {
428
            env->sr[i] = sregs.u.s.ppc32.sr[i];
429
        }
430

    
431
        /* Sync BATs */
432
        for (i = 0; i < 8; i++) {
433
            env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
434
            env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
435
            env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
436
            env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
437
        }
438
    }
439

    
440
    return 0;
441
}
442

    
443
int kvmppc_set_interrupt(CPUState *env, int irq, int level)
444
{
445
    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
446

    
447
    if (irq != PPC_INTERRUPT_EXT) {
448
        return 0;
449
    }
450

    
451
    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
452
        return 0;
453
    }
454

    
455
    kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
456

    
457
    return 0;
458
}
459

    
460
#if defined(TARGET_PPCEMB)
461
#define PPC_INPUT_INT PPC40x_INPUT_INT
462
#elif defined(TARGET_PPC64)
463
#define PPC_INPUT_INT PPC970_INPUT_INT
464
#else
465
#define PPC_INPUT_INT PPC6xx_INPUT_INT
466
#endif
467

    
468
void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
469
{
470
    int r;
471
    unsigned irq;
472

    
473
    /* PowerPC Qemu tracks the various core input pins (interrupt, critical
474
     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
475
    if (!cap_interrupt_level &&
476
        run->ready_for_interrupt_injection &&
477
        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
478
        (env->irq_input_state & (1<<PPC_INPUT_INT)))
479
    {
480
        /* For now KVM disregards the 'irq' argument. However, in the
481
         * future KVM could cache it in-kernel to avoid a heavyweight exit
482
         * when reading the UIC.
483
         */
484
        irq = KVM_INTERRUPT_SET;
485

    
486
        dprintf("injected interrupt %d\n", irq);
487
        r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
488
        if (r < 0)
489
            printf("cpu %d fail inject %x\n", env->cpu_index, irq);
490

    
491
        /* Always wake up soon in case the interrupt was level based */
492
        qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
493
                       (get_ticks_per_sec() / 50));
494
    }
495

    
496
    /* We don't know if there are more interrupts pending after this. However,
497
     * the guest will return to userspace in the course of handling this one
498
     * anyways, so we will get a chance to deliver the rest. */
499
}
500

    
501
void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
502
{
503
}
504

    
505
int kvm_arch_process_async_events(CPUState *env)
506
{
507
    return env->halted;
508
}
509

    
510
static int kvmppc_handle_halt(CPUState *env)
511
{
512
    if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
513
        env->halted = 1;
514
        env->exception_index = EXCP_HLT;
515
    }
516

    
517
    return 0;
518
}
519

    
520
/* map dcr access to existing qemu dcr emulation */
521
static int kvmppc_handle_dcr_read(CPUState *env, uint32_t dcrn, uint32_t *data)
522
{
523
    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
524
        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
525

    
526
    return 0;
527
}
528

    
529
static int kvmppc_handle_dcr_write(CPUState *env, uint32_t dcrn, uint32_t data)
530
{
531
    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
532
        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
533

    
534
    return 0;
535
}
536

    
537
int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
538
{
539
    int ret;
540

    
541
    switch (run->exit_reason) {
542
    case KVM_EXIT_DCR:
543
        if (run->dcr.is_write) {
544
            dprintf("handle dcr write\n");
545
            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
546
        } else {
547
            dprintf("handle dcr read\n");
548
            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
549
        }
550
        break;
551
    case KVM_EXIT_HLT:
552
        dprintf("handle halt\n");
553
        ret = kvmppc_handle_halt(env);
554
        break;
555
#ifdef CONFIG_PSERIES
556
    case KVM_EXIT_PAPR_HCALL:
557
        dprintf("handle PAPR hypercall\n");
558
        run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
559
                                              run->papr_hcall.args);
560
        ret = 1;
561
        break;
562
#endif
563
    default:
564
        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
565
        ret = -1;
566
        break;
567
    }
568

    
569
    return ret;
570
}
571

    
572
static int read_cpuinfo(const char *field, char *value, int len)
573
{
574
    FILE *f;
575
    int ret = -1;
576
    int field_len = strlen(field);
577
    char line[512];
578

    
579
    f = fopen("/proc/cpuinfo", "r");
580
    if (!f) {
581
        return -1;
582
    }
583

    
584
    do {
585
        if(!fgets(line, sizeof(line), f)) {
586
            break;
587
        }
588
        if (!strncmp(line, field, field_len)) {
589
            strncpy(value, line, len);
590
            ret = 0;
591
            break;
592
        }
593
    } while(*line);
594

    
595
    fclose(f);
596

    
597
    return ret;
598
}
599

    
600
uint32_t kvmppc_get_tbfreq(void)
601
{
602
    char line[512];
603
    char *ns;
604
    uint32_t retval = get_ticks_per_sec();
605

    
606
    if (read_cpuinfo("timebase", line, sizeof(line))) {
607
        return retval;
608
    }
609

    
610
    if (!(ns = strchr(line, ':'))) {
611
        return retval;
612
    }
613

    
614
    ns++;
615

    
616
    retval = atoi(ns);
617
    return retval;
618
}
619

    
620
/* Try to find a device tree node for a CPU with clock-frequency property */
621
static int kvmppc_find_cpu_dt(char *buf, int buf_len)
622
{
623
    struct dirent *dirp;
624
    DIR *dp;
625

    
626
    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
627
        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
628
        return -1;
629
    }
630

    
631
    buf[0] = '\0';
632
    while ((dirp = readdir(dp)) != NULL) {
633
        FILE *f;
634
        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
635
                 dirp->d_name);
636
        f = fopen(buf, "r");
637
        if (f) {
638
            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
639
            fclose(f);
640
            break;
641
        }
642
        buf[0] = '\0';
643
    }
644
    closedir(dp);
645
    if (buf[0] == '\0') {
646
        printf("Unknown host!\n");
647
        return -1;
648
    }
649

    
650
    return 0;
651
}
652

    
653
/* Read a CPU node property from the host device tree that's a single
654
 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
655
 * (can't find or open the property, or doesn't understand the
656
 * format) */
657
static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
658
{
659
    char buf[PATH_MAX];
660
    union {
661
        uint32_t v32;
662
        uint64_t v64;
663
    } u;
664
    FILE *f;
665
    int len;
666

    
667
    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
668
        return -1;
669
    }
670

    
671
    strncat(buf, "/", sizeof(buf) - strlen(buf));
672
    strncat(buf, propname, sizeof(buf) - strlen(buf));
673

    
674
    f = fopen(buf, "rb");
675
    if (!f) {
676
        return -1;
677
    }
678

    
679
    len = fread(&u, 1, sizeof(u), f);
680
    fclose(f);
681
    switch (len) {
682
    case 4:
683
        /* property is a 32-bit quantity */
684
        return be32_to_cpu(u.v32);
685
    case 8:
686
        return be64_to_cpu(u.v64);
687
    }
688

    
689
    return 0;
690
}
691

    
692
uint64_t kvmppc_get_clockfreq(void)
693
{
694
    return kvmppc_read_int_cpu_dt("clock-frequency");
695
}
696

    
697
uint32_t kvmppc_get_vmx(void)
698
{
699
    return kvmppc_read_int_cpu_dt("ibm,vmx");
700
}
701

    
702
uint32_t kvmppc_get_dfp(void)
703
{
704
    return kvmppc_read_int_cpu_dt("ibm,dfp");
705
}
706

    
707
int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
708
{
709
    uint32_t *hc = (uint32_t*)buf;
710

    
711
    struct kvm_ppc_pvinfo pvinfo;
712

    
713
    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
714
        !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
715
        memcpy(buf, pvinfo.hcall, buf_len);
716

    
717
        return 0;
718
    }
719

    
720
    /*
721
     * Fallback to always fail hypercalls:
722
     *
723
     *     li r3, -1
724
     *     nop
725
     *     nop
726
     *     nop
727
     */
728

    
729
    hc[0] = 0x3860ffff;
730
    hc[1] = 0x60000000;
731
    hc[2] = 0x60000000;
732
    hc[3] = 0x60000000;
733

    
734
    return 0;
735
}
736

    
737
void kvmppc_set_papr(CPUState *env)
738
{
739
    struct kvm_enable_cap cap = {};
740
    struct kvm_one_reg reg = {};
741
    struct kvm_sregs sregs = {};
742
    int ret;
743

    
744
    cap.cap = KVM_CAP_PPC_PAPR;
745
    ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
746

    
747
    if (ret) {
748
        goto fail;
749
    }
750

    
751
    /*
752
     * XXX We set HIOR here. It really should be a qdev property of
753
     *     the CPU node, but we don't have CPUs converted to qdev yet.
754
     *
755
     *     Once we have qdev CPUs, move HIOR to a qdev property and
756
     *     remove this chunk.
757
     */
758
    reg.id = KVM_ONE_REG_PPC_HIOR;
759
    reg.u.reg64 = env->spr[SPR_HIOR];
760
    ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
761
    if (ret) {
762
        goto fail;
763
    }
764

    
765
    /* Set SDR1 so kernel space finds the HTAB */
766
    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
767
    if (ret) {
768
        goto fail;
769
    }
770

    
771
    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
772

    
773
    ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
774
    if (ret) {
775
        goto fail;
776
    }
777

    
778
    return;
779

    
780
fail:
781
    cpu_abort(env, "This KVM version does not support PAPR\n");
782
}
783

    
784
int kvmppc_smt_threads(void)
785
{
786
    return cap_ppc_smt ? cap_ppc_smt : 1;
787
}
788

    
789
off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
790
{
791
    void *rma;
792
    off_t size;
793
    int fd;
794
    struct kvm_allocate_rma ret;
795
    MemoryRegion *rma_region;
796

    
797
    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
798
     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
799
     *                      not necessary on this hardware
800
     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
801
     *
802
     * FIXME: We should allow the user to force contiguous RMA
803
     * allocation in the cap_ppc_rma==1 case.
804
     */
805
    if (cap_ppc_rma < 2) {
806
        return 0;
807
    }
808

    
809
    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
810
    if (fd < 0) {
811
        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
812
                strerror(errno));
813
        return -1;
814
    }
815

    
816
    size = MIN(ret.rma_size, 256ul << 20);
817

    
818
    rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
819
    if (rma == MAP_FAILED) {
820
        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
821
        return -1;
822
    };
823

    
824
    rma_region = g_new(MemoryRegion, 1);
825
    memory_region_init_ram_ptr(rma_region, name, size, rma);
826
    vmstate_register_ram_global(rma_region);
827
    memory_region_add_subregion(sysmem, 0, rma_region);
828

    
829
    return size;
830
}
831

    
832
void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
833
{
834
    struct kvm_create_spapr_tce args = {
835
        .liobn = liobn,
836
        .window_size = window_size,
837
    };
838
    long len;
839
    int fd;
840
    void *table;
841

    
842
    if (!cap_spapr_tce) {
843
        return NULL;
844
    }
845

    
846
    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
847
    if (fd < 0) {
848
        return NULL;
849
    }
850

    
851
    len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE) * sizeof(VIOsPAPR_RTCE);
852
    /* FIXME: round this up to page size */
853

    
854
    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
855
    if (table == MAP_FAILED) {
856
        close(fd);
857
        return NULL;
858
    }
859

    
860
    *pfd = fd;
861
    return table;
862
}
863

    
864
int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
865
{
866
    long len;
867

    
868
    if (fd < 0) {
869
        return -1;
870
    }
871

    
872
    len = (window_size / SPAPR_VIO_TCE_PAGE_SIZE)*sizeof(VIOsPAPR_RTCE);
873
    if ((munmap(table, len) < 0) ||
874
        (close(fd) < 0)) {
875
        fprintf(stderr, "KVM: Unexpected error removing KVM SPAPR TCE "
876
                "table: %s", strerror(errno));
877
        /* Leak the table */
878
    }
879

    
880
    return 0;
881
}
882

    
883
static inline uint32_t mfpvr(void)
884
{
885
    uint32_t pvr;
886

    
887
    asm ("mfpvr %0"
888
         : "=r"(pvr));
889
    return pvr;
890
}
891

    
892
static void alter_insns(uint64_t *word, uint64_t flags, bool on)
893
{
894
    if (on) {
895
        *word |= flags;
896
    } else {
897
        *word &= ~flags;
898
    }
899
}
900

    
901
const ppc_def_t *kvmppc_host_cpu_def(void)
902
{
903
    uint32_t host_pvr = mfpvr();
904
    const ppc_def_t *base_spec;
905
    ppc_def_t *spec;
906
    uint32_t vmx = kvmppc_get_vmx();
907
    uint32_t dfp = kvmppc_get_dfp();
908

    
909
    base_spec = ppc_find_by_pvr(host_pvr);
910

    
911
    spec = g_malloc0(sizeof(*spec));
912
    memcpy(spec, base_spec, sizeof(*spec));
913

    
914
    /* Now fix up the spec with information we can query from the host */
915

    
916
    if (vmx != -1) {
917
        /* Only override when we know what the host supports */
918
        alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
919
        alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
920
    }
921
    if (dfp != -1) {
922
        /* Only override when we know what the host supports */
923
        alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
924
    }
925

    
926
    return spec;
927
}
928

    
929
bool kvm_arch_stop_on_emulation_error(CPUState *env)
930
{
931
    return true;
932
}
933

    
934
int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
935
{
936
    return 1;
937
}
938

    
939
int kvm_arch_on_sigbus(int code, void *addr)
940
{
941
    return 1;
942
}