Statistics
| Branch: | Revision:

root / hw / ppc / spapr.c @ fd506b4f

History | View | Annotate | Download (31.3 kB)

1
/*
2
 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
3
 *
4
 * Copyright (c) 2004-2007 Fabrice Bellard
5
 * Copyright (c) 2007 Jocelyn Mayer
6
 * Copyright (c) 2010 David Gibson, IBM Corporation.
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining a copy
9
 * of this software and associated documentation files (the "Software"), to deal
10
 * in the Software without restriction, including without limitation the rights
11
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
 * copies of the Software, and to permit persons to whom the Software is
13
 * furnished to do so, subject to the following conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be included in
16
 * all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
 * THE SOFTWARE.
25
 *
26
 */
27
#include "sysemu/sysemu.h"
28
#include "hw/hw.h"
29
#include "elf.h"
30
#include "net/net.h"
31
#include "sysemu/blockdev.h"
32
#include "sysemu/cpus.h"
33
#include "sysemu/kvm.h"
34
#include "kvm_ppc.h"
35

    
36
#include "hw/boards.h"
37
#include "hw/ppc/ppc.h"
38
#include "hw/loader.h"
39

    
40
#include "hw/ppc/spapr.h"
41
#include "hw/ppc/spapr_vio.h"
42
#include "hw/pci-host/spapr.h"
43
#include "hw/ppc/xics.h"
44
#include "hw/pci/msi.h"
45

    
46
#include "sysemu/kvm.h"
47
#include "kvm_ppc.h"
48
#include "hw/pci/pci.h"
49

    
50
#include "exec/address-spaces.h"
51
#include "hw/usb.h"
52
#include "qemu/config-file.h"
53

    
54
#include <libfdt.h>
55

    
56
/* SLOF memory layout:
57
 *
58
 * SLOF raw image loaded at 0, copies its romfs right below the flat
59
 * device-tree, then position SLOF itself 31M below that
60
 *
61
 * So we set FW_OVERHEAD to 40MB which should account for all of that
62
 * and more
63
 *
64
 * We load our kernel at 4M, leaving space for SLOF initial image
65
 */
66
#define FDT_MAX_SIZE            0x10000
67
#define RTAS_MAX_SIZE           0x10000
68
#define FW_MAX_SIZE             0x400000
69
#define FW_FILE_NAME            "slof.bin"
70
#define FW_OVERHEAD             0x2800000
71
#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
72

    
73
#define MIN_RMA_SLOF            128UL
74

    
75
#define TIMEBASE_FREQ           512000000ULL
76

    
77
#define MAX_CPUS                256
78
#define XICS_IRQS               1024
79

    
80
#define PHANDLE_XICP            0x00001111
81

    
82
#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
83

    
84
sPAPREnvironment *spapr;
85

    
86
int spapr_allocate_irq(int hint, bool lsi)
87
{
88
    int irq;
89

    
90
    if (hint) {
91
        irq = hint;
92
        /* FIXME: we should probably check for collisions somehow */
93
    } else {
94
        irq = spapr->next_irq++;
95
    }
96

    
97
    /* Configure irq type */
98
    if (!xics_get_qirq(spapr->icp, irq)) {
99
        return 0;
100
    }
101

    
102
    xics_set_irq_type(spapr->icp, irq, lsi);
103

    
104
    return irq;
105
}
106

    
107
/* Allocate block of consequtive IRQs, returns a number of the first */
108
int spapr_allocate_irq_block(int num, bool lsi)
109
{
110
    int first = -1;
111
    int i;
112

    
113
    for (i = 0; i < num; ++i) {
114
        int irq;
115

    
116
        irq = spapr_allocate_irq(0, lsi);
117
        if (!irq) {
118
            return -1;
119
        }
120

    
121
        if (0 == i) {
122
            first = irq;
123
        }
124

    
125
        /* If the above doesn't create a consecutive block then that's
126
         * an internal bug */
127
        assert(irq == (first + i));
128
    }
129

    
130
    return first;
131
}
132

    
133
static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
134
{
135
    int ret = 0, offset;
136
    CPUPPCState *env;
137
    CPUState *cpu;
138
    char cpu_model[32];
139
    int smt = kvmppc_smt_threads();
140
    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
141

    
142
    assert(spapr->cpu_model);
143

    
144
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
145
        cpu = CPU(ppc_env_get_cpu(env));
146
        uint32_t associativity[] = {cpu_to_be32(0x5),
147
                                    cpu_to_be32(0x0),
148
                                    cpu_to_be32(0x0),
149
                                    cpu_to_be32(0x0),
150
                                    cpu_to_be32(cpu->numa_node),
151
                                    cpu_to_be32(cpu->cpu_index)};
152

    
153
        if ((cpu->cpu_index % smt) != 0) {
154
            continue;
155
        }
156

    
157
        snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
158
                 cpu->cpu_index);
159

    
160
        offset = fdt_path_offset(fdt, cpu_model);
161
        if (offset < 0) {
162
            return offset;
163
        }
164

    
165
        if (nb_numa_nodes > 1) {
166
            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
167
                              sizeof(associativity));
168
            if (ret < 0) {
169
                return ret;
170
            }
171
        }
172

    
173
        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
174
                          pft_size_prop, sizeof(pft_size_prop));
175
        if (ret < 0) {
176
            return ret;
177
        }
178
    }
179
    return ret;
180
}
181

    
182

    
183
static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
184
                                     size_t maxsize)
185
{
186
    size_t maxcells = maxsize / sizeof(uint32_t);
187
    int i, j, count;
188
    uint32_t *p = prop;
189

    
190
    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
191
        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
192

    
193
        if (!sps->page_shift) {
194
            break;
195
        }
196
        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
197
            if (sps->enc[count].page_shift == 0) {
198
                break;
199
            }
200
        }
201
        if ((p - prop) >= (maxcells - 3 - count * 2)) {
202
            break;
203
        }
204
        *(p++) = cpu_to_be32(sps->page_shift);
205
        *(p++) = cpu_to_be32(sps->slb_enc);
206
        *(p++) = cpu_to_be32(count);
207
        for (j = 0; j < count; j++) {
208
            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
209
            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
210
        }
211
    }
212

    
213
    return (p - prop) * sizeof(uint32_t);
214
}
215

    
216
#define _FDT(exp) \
217
    do { \
218
        int ret = (exp);                                           \
219
        if (ret < 0) {                                             \
220
            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
221
                    #exp, fdt_strerror(ret));                      \
222
            exit(1);                                               \
223
        }                                                          \
224
    } while (0)
225

    
226

    
227
static void *spapr_create_fdt_skel(const char *cpu_model,
228
                                   hwaddr initrd_base,
229
                                   hwaddr initrd_size,
230
                                   hwaddr kernel_size,
231
                                   const char *boot_device,
232
                                   const char *kernel_cmdline,
233
                                   uint32_t epow_irq)
234
{
235
    void *fdt;
236
    CPUPPCState *env;
237
    uint32_t start_prop = cpu_to_be32(initrd_base);
238
    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
239
    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
240
        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
241
    char qemu_hypertas_prop[] = "hcall-memop1";
242
    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
243
    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
244
    char *modelname;
245
    int i, smt = kvmppc_smt_threads();
246
    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
247

    
248
    fdt = g_malloc0(FDT_MAX_SIZE);
249
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
250

    
251
    if (kernel_size) {
252
        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
253
    }
254
    if (initrd_size) {
255
        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
256
    }
257
    _FDT((fdt_finish_reservemap(fdt)));
258

    
259
    /* Root node */
260
    _FDT((fdt_begin_node(fdt, "")));
261
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
262
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
263
    _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
264

    
265
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
266
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
267

    
268
    /* /chosen */
269
    _FDT((fdt_begin_node(fdt, "chosen")));
270

    
271
    /* Set Form1_affinity */
272
    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
273

    
274
    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
275
    _FDT((fdt_property(fdt, "linux,initrd-start",
276
                       &start_prop, sizeof(start_prop))));
277
    _FDT((fdt_property(fdt, "linux,initrd-end",
278
                       &end_prop, sizeof(end_prop))));
279
    if (kernel_size) {
280
        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
281
                              cpu_to_be64(kernel_size) };
282

    
283
        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
284
    }
285
    if (boot_device) {
286
        _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
287
    }
288
    _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
289
    _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
290
    _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
291

    
292
    _FDT((fdt_end_node(fdt)));
293

    
294
    /* cpus */
295
    _FDT((fdt_begin_node(fdt, "cpus")));
296

    
297
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
298
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
299

    
300
    modelname = g_strdup(cpu_model);
301

    
302
    for (i = 0; i < strlen(modelname); i++) {
303
        modelname[i] = toupper(modelname[i]);
304
    }
305

    
306
    /* This is needed during FDT finalization */
307
    spapr->cpu_model = g_strdup(modelname);
308

    
309
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
310
        CPUState *cpu = CPU(ppc_env_get_cpu(env));
311
        PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
312
        int index = cpu->cpu_index;
313
        uint32_t servers_prop[smp_threads];
314
        uint32_t gservers_prop[smp_threads * 2];
315
        char *nodename;
316
        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
317
                           0xffffffff, 0xffffffff};
318
        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
319
        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
320
        uint32_t page_sizes_prop[64];
321
        size_t page_sizes_prop_size;
322

    
323
        if ((index % smt) != 0) {
324
            continue;
325
        }
326

    
327
        nodename = g_strdup_printf("%s@%x", modelname, index);
328

    
329
        _FDT((fdt_begin_node(fdt, nodename)));
330

    
331
        g_free(nodename);
332

    
333
        _FDT((fdt_property_cell(fdt, "reg", index)));
334
        _FDT((fdt_property_string(fdt, "device_type", "cpu")));
335

    
336
        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
337
        _FDT((fdt_property_cell(fdt, "d-cache-block-size",
338
                                env->dcache_line_size)));
339
        _FDT((fdt_property_cell(fdt, "d-cache-line-size",
340
                                env->dcache_line_size)));
341
        _FDT((fdt_property_cell(fdt, "i-cache-block-size",
342
                                env->icache_line_size)));
343
        _FDT((fdt_property_cell(fdt, "i-cache-line-size",
344
                                env->icache_line_size)));
345

    
346
        if (pcc->l1_dcache_size) {
347
            _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
348
        } else {
349
            fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
350
        }
351
        if (pcc->l1_icache_size) {
352
            _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
353
        } else {
354
            fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
355
        }
356

    
357
        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
358
        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
359
        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
360
        _FDT((fdt_property_string(fdt, "status", "okay")));
361
        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
362

    
363
        /* Build interrupt servers and gservers properties */
364
        for (i = 0; i < smp_threads; i++) {
365
            servers_prop[i] = cpu_to_be32(index + i);
366
            /* Hack, direct the group queues back to cpu 0 */
367
            gservers_prop[i*2] = cpu_to_be32(index + i);
368
            gservers_prop[i*2 + 1] = 0;
369
        }
370
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
371
                           servers_prop, sizeof(servers_prop))));
372
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
373
                           gservers_prop, sizeof(gservers_prop))));
374

    
375
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
376
            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
377
                               segs, sizeof(segs))));
378
        }
379

    
380
        /* Advertise VMX/VSX (vector extensions) if available
381
         *   0 / no property == no vector extensions
382
         *   1               == VMX / Altivec available
383
         *   2               == VSX available */
384
        if (env->insns_flags & PPC_ALTIVEC) {
385
            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
386

    
387
            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
388
        }
389

    
390
        /* Advertise DFP (Decimal Floating Point) if available
391
         *   0 / no property == no DFP
392
         *   1               == DFP available */
393
        if (env->insns_flags2 & PPC2_DFP) {
394
            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
395
        }
396

    
397
        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
398
                                                      sizeof(page_sizes_prop));
399
        if (page_sizes_prop_size) {
400
            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
401
                               page_sizes_prop, page_sizes_prop_size)));
402
        }
403

    
404
        _FDT((fdt_end_node(fdt)));
405
    }
406

    
407
    g_free(modelname);
408

    
409
    _FDT((fdt_end_node(fdt)));
410

    
411
    /* RTAS */
412
    _FDT((fdt_begin_node(fdt, "rtas")));
413

    
414
    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
415
                       sizeof(hypertas_prop))));
416
    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
417
                       sizeof(qemu_hypertas_prop))));
418

    
419
    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
420
        refpoints, sizeof(refpoints))));
421

    
422
    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
423

    
424
    _FDT((fdt_end_node(fdt)));
425

    
426
    /* interrupt controller */
427
    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
428

    
429
    _FDT((fdt_property_string(fdt, "device_type",
430
                              "PowerPC-External-Interrupt-Presentation")));
431
    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
432
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
433
    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
434
                       interrupt_server_ranges_prop,
435
                       sizeof(interrupt_server_ranges_prop))));
436
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
437
    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
438
    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
439

    
440
    _FDT((fdt_end_node(fdt)));
441

    
442
    /* vdevice */
443
    _FDT((fdt_begin_node(fdt, "vdevice")));
444

    
445
    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
446
    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
447
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
448
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
449
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
450
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
451

    
452
    _FDT((fdt_end_node(fdt)));
453

    
454
    /* event-sources */
455
    spapr_events_fdt_skel(fdt, epow_irq);
456

    
457
    _FDT((fdt_end_node(fdt))); /* close root node */
458
    _FDT((fdt_finish(fdt)));
459

    
460
    return fdt;
461
}
462

    
463
static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
464
{
465
    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
466
                                cpu_to_be32(0x0), cpu_to_be32(0x0),
467
                                cpu_to_be32(0x0)};
468
    char mem_name[32];
469
    hwaddr node0_size, mem_start;
470
    uint64_t mem_reg_property[2];
471
    int i, off;
472

    
473
    /* memory node(s) */
474
    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
475
    if (spapr->rma_size > node0_size) {
476
        spapr->rma_size = node0_size;
477
    }
478

    
479
    /* RMA */
480
    mem_reg_property[0] = 0;
481
    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
482
    off = fdt_add_subnode(fdt, 0, "memory@0");
483
    _FDT(off);
484
    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
485
    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
486
                      sizeof(mem_reg_property))));
487
    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
488
                      sizeof(associativity))));
489

    
490
    /* RAM: Node 0 */
491
    if (node0_size > spapr->rma_size) {
492
        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
493
        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
494

    
495
        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
496
        off = fdt_add_subnode(fdt, 0, mem_name);
497
        _FDT(off);
498
        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
499
        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
500
                          sizeof(mem_reg_property))));
501
        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
502
                          sizeof(associativity))));
503
    }
504

    
505
    /* RAM: Node 1 and beyond */
506
    mem_start = node0_size;
507
    for (i = 1; i < nb_numa_nodes; i++) {
508
        mem_reg_property[0] = cpu_to_be64(mem_start);
509
        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
510
        associativity[3] = associativity[4] = cpu_to_be32(i);
511
        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
512
        off = fdt_add_subnode(fdt, 0, mem_name);
513
        _FDT(off);
514
        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
515
        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
516
                          sizeof(mem_reg_property))));
517
        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
518
                          sizeof(associativity))));
519
        mem_start += node_mem[i];
520
    }
521

    
522
    return 0;
523
}
524

    
525
static void spapr_finalize_fdt(sPAPREnvironment *spapr,
526
                               hwaddr fdt_addr,
527
                               hwaddr rtas_addr,
528
                               hwaddr rtas_size)
529
{
530
    int ret;
531
    void *fdt;
532
    sPAPRPHBState *phb;
533

    
534
    fdt = g_malloc(FDT_MAX_SIZE);
535

    
536
    /* open out the base tree into a temp buffer for the final tweaks */
537
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
538

    
539
    ret = spapr_populate_memory(spapr, fdt);
540
    if (ret < 0) {
541
        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
542
        exit(1);
543
    }
544

    
545
    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
546
    if (ret < 0) {
547
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
548
        exit(1);
549
    }
550

    
551
    QLIST_FOREACH(phb, &spapr->phbs, list) {
552
        ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
553
    }
554

    
555
    if (ret < 0) {
556
        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
557
        exit(1);
558
    }
559

    
560
    /* RTAS */
561
    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
562
    if (ret < 0) {
563
        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
564
    }
565

    
566
    /* Advertise NUMA via ibm,associativity */
567
    ret = spapr_fixup_cpu_dt(fdt, spapr);
568
    if (ret < 0) {
569
        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
570
    }
571

    
572
    if (!spapr->has_graphics) {
573
        spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
574
    }
575

    
576
    _FDT((fdt_pack(fdt)));
577

    
578
    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
579
        hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
580
                 fdt_totalsize(fdt), FDT_MAX_SIZE);
581
        exit(1);
582
    }
583

    
584
    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
585

    
586
    g_free(fdt);
587
}
588

    
589
static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
590
{
591
    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
592
}
593

    
594
static void emulate_spapr_hypercall(PowerPCCPU *cpu)
595
{
596
    CPUPPCState *env = &cpu->env;
597

    
598
    if (msr_pr) {
599
        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
600
        env->gpr[3] = H_PRIVILEGE;
601
    } else {
602
        env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
603
    }
604
}
605

    
606
static void spapr_reset_htab(sPAPREnvironment *spapr)
607
{
608
    long shift;
609

    
610
    /* allocate hash page table.  For now we always make this 16mb,
611
     * later we should probably make it scale to the size of guest
612
     * RAM */
613

    
614
    shift = kvmppc_reset_htab(spapr->htab_shift);
615

    
616
    if (shift > 0) {
617
        /* Kernel handles htab, we don't need to allocate one */
618
        spapr->htab_shift = shift;
619
    } else {
620
        if (!spapr->htab) {
621
            /* Allocate an htab if we don't yet have one */
622
            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
623
        }
624

    
625
        /* And clear it */
626
        memset(spapr->htab, 0, HTAB_SIZE(spapr));
627
    }
628

    
629
    /* Update the RMA size if necessary */
630
    if (spapr->vrma_adjust) {
631
        spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
632
    }
633
}
634

    
635
static void ppc_spapr_reset(void)
636
{
637
    CPUState *first_cpu_cpu;
638

    
639
    /* Reset the hash table & recalc the RMA */
640
    spapr_reset_htab(spapr);
641

    
642
    qemu_devices_reset();
643

    
644
    /* Load the fdt */
645
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
646
                       spapr->rtas_size);
647

    
648
    /* Set up the entry state */
649
    first_cpu_cpu = ENV_GET_CPU(first_cpu);
650
    first_cpu->gpr[3] = spapr->fdt_addr;
651
    first_cpu->gpr[5] = 0;
652
    first_cpu_cpu->halted = 0;
653
    first_cpu->nip = spapr->entry_point;
654

    
655
}
656

    
657
static void spapr_cpu_reset(void *opaque)
658
{
659
    PowerPCCPU *cpu = opaque;
660
    CPUState *cs = CPU(cpu);
661
    CPUPPCState *env = &cpu->env;
662

    
663
    cpu_reset(cs);
664

    
665
    /* All CPUs start halted.  CPU0 is unhalted from the machine level
666
     * reset code and the rest are explicitly started up by the guest
667
     * using an RTAS call */
668
    cs->halted = 1;
669

    
670
    env->spr[SPR_HIOR] = 0;
671

    
672
    env->external_htab = spapr->htab;
673
    env->htab_base = -1;
674
    env->htab_mask = HTAB_SIZE(spapr) - 1;
675
    env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
676
        (spapr->htab_shift - 18);
677
}
678

    
679
static void spapr_create_nvram(sPAPREnvironment *spapr)
680
{
681
    QemuOpts *machine_opts;
682
    DeviceState *dev;
683

    
684
    dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
685

    
686
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
687
    if (machine_opts) {
688
        const char *drivename;
689

    
690
        drivename = qemu_opt_get(machine_opts, "nvram");
691
        if (drivename) {
692
            BlockDriverState *bs;
693

    
694
            bs = bdrv_find(drivename);
695
            if (!bs) {
696
                fprintf(stderr, "No such block device \"%s\" for nvram\n",
697
                        drivename);
698
                exit(1);
699
            }
700
            qdev_prop_set_drive_nofail(dev, "drive", bs);
701
        }
702
    }
703

    
704
    qdev_init_nofail(dev);
705

    
706
    spapr->nvram = (struct sPAPRNVRAM *)dev;
707
}
708

    
709
/* Returns whether we want to use VGA or not */
710
static int spapr_vga_init(PCIBus *pci_bus)
711
{
712
    switch (vga_interface_type) {
713
    case VGA_NONE:
714
    case VGA_STD:
715
        return pci_vga_init(pci_bus) != NULL;
716
    default:
717
        fprintf(stderr, "This vga model is not supported,"
718
                "currently it only supports -vga std\n");
719
        exit(0);
720
        break;
721
    }
722
}
723

    
724
/* pSeries LPAR / sPAPR hardware init */
725
static void ppc_spapr_init(QEMUMachineInitArgs *args)
726
{
727
    ram_addr_t ram_size = args->ram_size;
728
    const char *cpu_model = args->cpu_model;
729
    const char *kernel_filename = args->kernel_filename;
730
    const char *kernel_cmdline = args->kernel_cmdline;
731
    const char *initrd_filename = args->initrd_filename;
732
    const char *boot_device = args->boot_device;
733
    PowerPCCPU *cpu;
734
    CPUPPCState *env;
735
    PCIHostState *phb;
736
    int i;
737
    MemoryRegion *sysmem = get_system_memory();
738
    MemoryRegion *ram = g_new(MemoryRegion, 1);
739
    hwaddr rma_alloc_size;
740
    uint32_t initrd_base = 0;
741
    long kernel_size = 0, initrd_size = 0;
742
    long load_limit, rtas_limit, fw_size;
743
    char *filename;
744

    
745
    msi_supported = true;
746

    
747
    spapr = g_malloc0(sizeof(*spapr));
748
    QLIST_INIT(&spapr->phbs);
749

    
750
    cpu_ppc_hypercall = emulate_spapr_hypercall;
751

    
752
    /* Allocate RMA if necessary */
753
    rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);
754

    
755
    if (rma_alloc_size == -1) {
756
        hw_error("qemu: Unable to create RMA\n");
757
        exit(1);
758
    }
759

    
760
    if (rma_alloc_size && (rma_alloc_size < ram_size)) {
761
        spapr->rma_size = rma_alloc_size;
762
    } else {
763
        spapr->rma_size = ram_size;
764

    
765
        /* With KVM, we don't actually know whether KVM supports an
766
         * unbounded RMA (PR KVM) or is limited by the hash table size
767
         * (HV KVM using VRMA), so we always assume the latter
768
         *
769
         * In that case, we also limit the initial allocations for RTAS
770
         * etc... to 256M since we have no way to know what the VRMA size
771
         * is going to be as it depends on the size of the hash table
772
         * isn't determined yet.
773
         */
774
        if (kvm_enabled()) {
775
            spapr->vrma_adjust = 1;
776
            spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
777
        }
778
    }
779

    
780
    /* We place the device tree and RTAS just below either the top of the RMA,
781
     * or just below 2GB, whichever is lowere, so that it can be
782
     * processed with 32-bit real mode code if necessary */
783
    rtas_limit = MIN(spapr->rma_size, 0x80000000);
784
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
785
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
786
    load_limit = spapr->fdt_addr - FW_OVERHEAD;
787

    
788
    /* We aim for a hash table of size 1/128 the size of RAM.  The
789
     * normal rule of thumb is 1/64 the size of RAM, but that's much
790
     * more than needed for the Linux guests we support. */
791
    spapr->htab_shift = 18; /* Minimum architected size */
792
    while (spapr->htab_shift <= 46) {
793
        if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
794
            break;
795
        }
796
        spapr->htab_shift++;
797
    }
798

    
799
    /* Set up Interrupt Controller before we create the VCPUs */
800
    spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
801
                                  XICS_IRQS);
802
    spapr->next_irq = XICS_IRQ_BASE;
803

    
804
    /* init CPUs */
805
    if (cpu_model == NULL) {
806
        cpu_model = kvm_enabled() ? "host" : "POWER7";
807
    }
808
    for (i = 0; i < smp_cpus; i++) {
809
        cpu = cpu_ppc_init(cpu_model);
810
        if (cpu == NULL) {
811
            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
812
            exit(1);
813
        }
814
        env = &cpu->env;
815

    
816
        xics_cpu_setup(spapr->icp, cpu);
817

    
818
        /* Set time-base frequency to 512 MHz */
819
        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
820

    
821
        /* PAPR always has exception vectors in RAM not ROM. To ensure this,
822
         * MSR[IP] should never be set.
823
         */
824
        env->msr_mask &= ~(1 << 6);
825

    
826
        /* Tell KVM that we're in PAPR mode */
827
        if (kvm_enabled()) {
828
            kvmppc_set_papr(cpu);
829
        }
830

    
831
        qemu_register_reset(spapr_cpu_reset, cpu);
832
    }
833

    
834
    /* allocate RAM */
835
    spapr->ram_limit = ram_size;
836
    if (spapr->ram_limit > rma_alloc_size) {
837
        ram_addr_t nonrma_base = rma_alloc_size;
838
        ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
839

    
840
        memory_region_init_ram(ram, "ppc_spapr.ram", nonrma_size);
841
        vmstate_register_ram_global(ram);
842
        memory_region_add_subregion(sysmem, nonrma_base, ram);
843
    }
844

    
845
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
846
    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
847
                                           rtas_limit - spapr->rtas_addr);
848
    if (spapr->rtas_size < 0) {
849
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
850
        exit(1);
851
    }
852
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
853
        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
854
                 spapr->rtas_size, RTAS_MAX_SIZE);
855
        exit(1);
856
    }
857
    g_free(filename);
858

    
859
    /* Set up EPOW events infrastructure */
860
    spapr_events_init(spapr);
861

    
862
    /* Set up IOMMU */
863
    spapr_iommu_init();
864

    
865
    /* Set up VIO bus */
866
    spapr->vio_bus = spapr_vio_bus_init();
867

    
868
    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
869
        if (serial_hds[i]) {
870
            spapr_vty_create(spapr->vio_bus, serial_hds[i]);
871
        }
872
    }
873

    
874
    /* We always have at least the nvram device on VIO */
875
    spapr_create_nvram(spapr);
876

    
877
    /* Set up PCI */
878
    spapr_pci_rtas_init();
879

    
880
    phb = spapr_create_phb(spapr, 0);
881

    
882
    for (i = 0; i < nb_nics; i++) {
883
        NICInfo *nd = &nd_table[i];
884

    
885
        if (!nd->model) {
886
            nd->model = g_strdup("ibmveth");
887
        }
888

    
889
        if (strcmp(nd->model, "ibmveth") == 0) {
890
            spapr_vlan_create(spapr->vio_bus, nd);
891
        } else {
892
            pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
893
        }
894
    }
895

    
896
    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
897
        spapr_vscsi_create(spapr->vio_bus);
898
    }
899

    
900
    /* Graphics */
901
    if (spapr_vga_init(phb->bus)) {
902
        spapr->has_graphics = true;
903
    }
904

    
905
    if (usb_enabled(spapr->has_graphics)) {
906
        pci_create_simple(phb->bus, -1, "pci-ohci");
907
        if (spapr->has_graphics) {
908
            usbdevice_create("keyboard");
909
            usbdevice_create("mouse");
910
        }
911
    }
912

    
913
    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
914
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
915
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
916
        exit(1);
917
    }
918

    
919
    if (kernel_filename) {
920
        uint64_t lowaddr = 0;
921

    
922
        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
923
                               NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
924
        if (kernel_size < 0) {
925
            kernel_size = load_image_targphys(kernel_filename,
926
                                              KERNEL_LOAD_ADDR,
927
                                              load_limit - KERNEL_LOAD_ADDR);
928
        }
929
        if (kernel_size < 0) {
930
            fprintf(stderr, "qemu: could not load kernel '%s'\n",
931
                    kernel_filename);
932
            exit(1);
933
        }
934

    
935
        /* load initrd */
936
        if (initrd_filename) {
937
            /* Try to locate the initrd in the gap between the kernel
938
             * and the firmware. Add a bit of space just in case
939
             */
940
            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
941
            initrd_size = load_image_targphys(initrd_filename, initrd_base,
942
                                              load_limit - initrd_base);
943
            if (initrd_size < 0) {
944
                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
945
                        initrd_filename);
946
                exit(1);
947
            }
948
        } else {
949
            initrd_base = 0;
950
            initrd_size = 0;
951
        }
952
    }
953

    
954
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
955
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
956
    if (fw_size < 0) {
957
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
958
        exit(1);
959
    }
960
    g_free(filename);
961

    
962
    spapr->entry_point = 0x100;
963

    
964
    /* Prepare the device tree */
965
    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
966
                                            initrd_base, initrd_size,
967
                                            kernel_size,
968
                                            boot_device, kernel_cmdline,
969
                                            spapr->epow_irq);
970
    assert(spapr->fdt_skel != NULL);
971
}
972

    
973
static QEMUMachine spapr_machine = {
974
    .name = "pseries",
975
    .desc = "pSeries Logical Partition (PAPR compliant)",
976
    .init = ppc_spapr_init,
977
    .reset = ppc_spapr_reset,
978
    .block_default_type = IF_SCSI,
979
    .max_cpus = MAX_CPUS,
980
    .no_parallel = 1,
981
    .boot_order = NULL,
982
};
983

    
984
static void spapr_machine_init(void)
985
{
986
    qemu_register_machine(&spapr_machine);
987
}
988

    
989
machine_init(spapr_machine_init);