Statistics
| Branch: | Revision:

root / hw / spapr.c @ f28359d8

History | View | Annotate | Download (27.5 kB)

1
/*
2
 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
3
 *
4
 * Copyright (c) 2004-2007 Fabrice Bellard
5
 * Copyright (c) 2007 Jocelyn Mayer
6
 * Copyright (c) 2010 David Gibson, IBM Corporation.
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining a copy
9
 * of this software and associated documentation files (the "Software"), to deal
10
 * in the Software without restriction, including without limitation the rights
11
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
 * copies of the Software, and to permit persons to whom the Software is
13
 * furnished to do so, subject to the following conditions:
14
 *
15
 * The above copyright notice and this permission notice shall be included in
16
 * all copies or substantial portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
 * THE SOFTWARE.
25
 *
26
 */
27
#include "sysemu.h"
28
#include "hw.h"
29
#include "elf.h"
30
#include "net.h"
31
#include "blockdev.h"
32
#include "cpus.h"
33
#include "kvm.h"
34
#include "kvm_ppc.h"
35

    
36
#include "hw/boards.h"
37
#include "hw/ppc.h"
38
#include "hw/loader.h"
39

    
40
#include "hw/spapr.h"
41
#include "hw/spapr_vio.h"
42
#include "hw/spapr_pci.h"
43
#include "hw/xics.h"
44

    
45
#include "kvm.h"
46
#include "kvm_ppc.h"
47
#include "pci.h"
48
#include "vga-pci.h"
49

    
50
#include "exec-memory.h"
51

    
52
#include <libfdt.h>
53

    
54
/* SLOF memory layout:
55
 *
56
 * SLOF raw image loaded at 0, copies its romfs right below the flat
57
 * device-tree, then position SLOF itself 31M below that
58
 *
59
 * So we set FW_OVERHEAD to 40MB which should account for all of that
60
 * and more
61
 *
62
 * We load our kernel at 4M, leaving space for SLOF initial image
63
 */
64
#define FDT_MAX_SIZE            0x10000
65
#define RTAS_MAX_SIZE           0x10000
66
#define FW_MAX_SIZE             0x400000
67
#define FW_FILE_NAME            "slof.bin"
68
#define FW_OVERHEAD             0x2800000
69
#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
70

    
71
#define MIN_RMA_SLOF            128UL
72

    
73
#define TIMEBASE_FREQ           512000000ULL
74

    
75
#define MAX_CPUS                256
76
#define XICS_IRQS               1024
77

    
78
#define SPAPR_PCI_BUID          0x800000020000001ULL
79
#define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
80
#define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
81
#define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)
82

    
83
#define PHANDLE_XICP            0x00001111
84

    
85
sPAPREnvironment *spapr;
86
bool spapr_has_graphics;
87

    
88
qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num,
89
                            enum xics_irq_type type)
90
{
91
    uint32_t irq;
92
    qemu_irq qirq;
93

    
94
    if (hint) {
95
        irq = hint;
96
        /* FIXME: we should probably check for collisions somehow */
97
    } else {
98
        irq = spapr->next_irq++;
99
    }
100

    
101
    qirq = xics_assign_irq(spapr->icp, irq, type);
102
    if (!qirq) {
103
        return NULL;
104
    }
105

    
106
    if (irq_num) {
107
        *irq_num = irq;
108
    }
109

    
110
    return qirq;
111
}
112

    
113
static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
114
{
115
    int ret = 0, offset;
116
    CPUPPCState *env;
117
    char cpu_model[32];
118
    int smt = kvmppc_smt_threads();
119

    
120
    assert(spapr->cpu_model);
121

    
122
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
123
        uint32_t associativity[] = {cpu_to_be32(0x5),
124
                                    cpu_to_be32(0x0),
125
                                    cpu_to_be32(0x0),
126
                                    cpu_to_be32(0x0),
127
                                    cpu_to_be32(env->numa_node),
128
                                    cpu_to_be32(env->cpu_index)};
129

    
130
        if ((env->cpu_index % smt) != 0) {
131
            continue;
132
        }
133

    
134
        snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model,
135
                 env->cpu_index);
136

    
137
        offset = fdt_path_offset(fdt, cpu_model);
138
        if (offset < 0) {
139
            return offset;
140
        }
141

    
142
        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
143
                          sizeof(associativity));
144
        if (ret < 0) {
145
            return ret;
146
        }
147
    }
148
    return ret;
149
}
150

    
151

    
152
static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
153
                                     size_t maxsize)
154
{
155
    size_t maxcells = maxsize / sizeof(uint32_t);
156
    int i, j, count;
157
    uint32_t *p = prop;
158

    
159
    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
160
        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
161

    
162
        if (!sps->page_shift) {
163
            break;
164
        }
165
        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
166
            if (sps->enc[count].page_shift == 0) {
167
                break;
168
            }
169
        }
170
        if ((p - prop) >= (maxcells - 3 - count * 2)) {
171
            break;
172
        }
173
        *(p++) = cpu_to_be32(sps->page_shift);
174
        *(p++) = cpu_to_be32(sps->slb_enc);
175
        *(p++) = cpu_to_be32(count);
176
        for (j = 0; j < count; j++) {
177
            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
178
            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
179
        }
180
    }
181

    
182
    return (p - prop) * sizeof(uint32_t);
183
}
184

    
185
static void *spapr_create_fdt_skel(const char *cpu_model,
186
                                   target_phys_addr_t rma_size,
187
                                   target_phys_addr_t initrd_base,
188
                                   target_phys_addr_t initrd_size,
189
                                   target_phys_addr_t kernel_size,
190
                                   const char *boot_device,
191
                                   const char *kernel_cmdline,
192
                                   long hash_shift)
193
{
194
    void *fdt;
195
    CPUPPCState *env;
196
    uint64_t mem_reg_property[2];
197
    uint32_t start_prop = cpu_to_be32(initrd_base);
198
    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
199
    uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
200
    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
201
        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
202
    char qemu_hypertas_prop[] = "hcall-memop1";
203
    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
204
    int i;
205
    char *modelname;
206
    int smt = kvmppc_smt_threads();
207
    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
208
    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
209
    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
210
                                cpu_to_be32(0x0), cpu_to_be32(0x0),
211
                                cpu_to_be32(0x0)};
212
    char mem_name[32];
213
    target_phys_addr_t node0_size, mem_start;
214

    
215
#define _FDT(exp) \
216
    do { \
217
        int ret = (exp);                                           \
218
        if (ret < 0) {                                             \
219
            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
220
                    #exp, fdt_strerror(ret));                      \
221
            exit(1);                                               \
222
        }                                                          \
223
    } while (0)
224

    
225
    fdt = g_malloc0(FDT_MAX_SIZE);
226
    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
227

    
228
    if (kernel_size) {
229
        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
230
    }
231
    if (initrd_size) {
232
        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
233
    }
234
    _FDT((fdt_finish_reservemap(fdt)));
235

    
236
    /* Root node */
237
    _FDT((fdt_begin_node(fdt, "")));
238
    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
239
    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
240

    
241
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
242
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
243

    
244
    /* /chosen */
245
    _FDT((fdt_begin_node(fdt, "chosen")));
246

    
247
    /* Set Form1_affinity */
248
    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
249

    
250
    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
251
    _FDT((fdt_property(fdt, "linux,initrd-start",
252
                       &start_prop, sizeof(start_prop))));
253
    _FDT((fdt_property(fdt, "linux,initrd-end",
254
                       &end_prop, sizeof(end_prop))));
255
    if (kernel_size) {
256
        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
257
                              cpu_to_be64(kernel_size) };
258

    
259
        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
260
    }
261
    _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
262
    _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
263
    _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
264
    _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
265

    
266
    _FDT((fdt_end_node(fdt)));
267

    
268
    /* memory node(s) */
269
    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
270
    if (rma_size > node0_size) {
271
        rma_size = node0_size;
272
    }
273

    
274
    /* RMA */
275
    mem_reg_property[0] = 0;
276
    mem_reg_property[1] = cpu_to_be64(rma_size);
277
    _FDT((fdt_begin_node(fdt, "memory@0")));
278
    _FDT((fdt_property_string(fdt, "device_type", "memory")));
279
    _FDT((fdt_property(fdt, "reg", mem_reg_property,
280
        sizeof(mem_reg_property))));
281
    _FDT((fdt_property(fdt, "ibm,associativity", associativity,
282
        sizeof(associativity))));
283
    _FDT((fdt_end_node(fdt)));
284

    
285
    /* RAM: Node 0 */
286
    if (node0_size > rma_size) {
287
        mem_reg_property[0] = cpu_to_be64(rma_size);
288
        mem_reg_property[1] = cpu_to_be64(node0_size - rma_size);
289

    
290
        sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size);
291
        _FDT((fdt_begin_node(fdt, mem_name)));
292
        _FDT((fdt_property_string(fdt, "device_type", "memory")));
293
        _FDT((fdt_property(fdt, "reg", mem_reg_property,
294
                           sizeof(mem_reg_property))));
295
        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
296
                           sizeof(associativity))));
297
        _FDT((fdt_end_node(fdt)));
298
    }
299

    
300
    /* RAM: Node 1 and beyond */
301
    mem_start = node0_size;
302
    for (i = 1; i < nb_numa_nodes; i++) {
303
        mem_reg_property[0] = cpu_to_be64(mem_start);
304
        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
305
        associativity[3] = associativity[4] = cpu_to_be32(i);
306
        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
307
        _FDT((fdt_begin_node(fdt, mem_name)));
308
        _FDT((fdt_property_string(fdt, "device_type", "memory")));
309
        _FDT((fdt_property(fdt, "reg", mem_reg_property,
310
            sizeof(mem_reg_property))));
311
        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
312
            sizeof(associativity))));
313
        _FDT((fdt_end_node(fdt)));
314
        mem_start += node_mem[i];
315
    }
316

    
317
    /* cpus */
318
    _FDT((fdt_begin_node(fdt, "cpus")));
319

    
320
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
321
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
322

    
323
    modelname = g_strdup(cpu_model);
324

    
325
    for (i = 0; i < strlen(modelname); i++) {
326
        modelname[i] = toupper(modelname[i]);
327
    }
328

    
329
    /* This is needed during FDT finalization */
330
    spapr->cpu_model = g_strdup(modelname);
331

    
332
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
333
        int index = env->cpu_index;
334
        uint32_t servers_prop[smp_threads];
335
        uint32_t gservers_prop[smp_threads * 2];
336
        char *nodename;
337
        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
338
                           0xffffffff, 0xffffffff};
339
        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
340
        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
341
        uint32_t page_sizes_prop[64];
342
        size_t page_sizes_prop_size;
343

    
344
        if ((index % smt) != 0) {
345
            continue;
346
        }
347

    
348
        if (asprintf(&nodename, "%s@%x", modelname, index) < 0) {
349
            fprintf(stderr, "Allocation failure\n");
350
            exit(1);
351
        }
352

    
353
        _FDT((fdt_begin_node(fdt, nodename)));
354

    
355
        free(nodename);
356

    
357
        _FDT((fdt_property_cell(fdt, "reg", index)));
358
        _FDT((fdt_property_string(fdt, "device_type", "cpu")));
359

    
360
        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
361
        _FDT((fdt_property_cell(fdt, "dcache-block-size",
362
                                env->dcache_line_size)));
363
        _FDT((fdt_property_cell(fdt, "icache-block-size",
364
                                env->icache_line_size)));
365
        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
366
        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
367
        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
368
        _FDT((fdt_property(fdt, "ibm,pft-size",
369
                           pft_size_prop, sizeof(pft_size_prop))));
370
        _FDT((fdt_property_string(fdt, "status", "okay")));
371
        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
372

    
373
        /* Build interrupt servers and gservers properties */
374
        for (i = 0; i < smp_threads; i++) {
375
            servers_prop[i] = cpu_to_be32(index + i);
376
            /* Hack, direct the group queues back to cpu 0 */
377
            gservers_prop[i*2] = cpu_to_be32(index + i);
378
            gservers_prop[i*2 + 1] = 0;
379
        }
380
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s",
381
                           servers_prop, sizeof(servers_prop))));
382
        _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s",
383
                           gservers_prop, sizeof(gservers_prop))));
384

    
385
        if (env->mmu_model & POWERPC_MMU_1TSEG) {
386
            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
387
                               segs, sizeof(segs))));
388
        }
389

    
390
        /* Advertise VMX/VSX (vector extensions) if available
391
         *   0 / no property == no vector extensions
392
         *   1               == VMX / Altivec available
393
         *   2               == VSX available */
394
        if (env->insns_flags & PPC_ALTIVEC) {
395
            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
396

    
397
            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
398
        }
399

    
400
        /* Advertise DFP (Decimal Floating Point) if available
401
         *   0 / no property == no DFP
402
         *   1               == DFP available */
403
        if (env->insns_flags2 & PPC2_DFP) {
404
            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
405
        }
406

    
407
        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
408
                                                      sizeof(page_sizes_prop));
409
        if (page_sizes_prop_size) {
410
            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
411
                               page_sizes_prop, page_sizes_prop_size)));
412
        }
413

    
414
        _FDT((fdt_end_node(fdt)));
415
    }
416

    
417
    g_free(modelname);
418

    
419
    _FDT((fdt_end_node(fdt)));
420

    
421
    /* RTAS */
422
    _FDT((fdt_begin_node(fdt, "rtas")));
423

    
424
    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
425
                       sizeof(hypertas_prop))));
426
    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop,
427
                       sizeof(qemu_hypertas_prop))));
428

    
429
    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
430
        refpoints, sizeof(refpoints))));
431

    
432
    _FDT((fdt_end_node(fdt)));
433

    
434
    /* interrupt controller */
435
    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
436

    
437
    _FDT((fdt_property_string(fdt, "device_type",
438
                              "PowerPC-External-Interrupt-Presentation")));
439
    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
440
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
441
    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
442
                       interrupt_server_ranges_prop,
443
                       sizeof(interrupt_server_ranges_prop))));
444
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
445
    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
446
    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
447

    
448
    _FDT((fdt_end_node(fdt)));
449

    
450
    /* vdevice */
451
    _FDT((fdt_begin_node(fdt, "vdevice")));
452

    
453
    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
454
    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
455
    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
456
    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
457
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
458
    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
459

    
460
    _FDT((fdt_end_node(fdt)));
461

    
462
    _FDT((fdt_end_node(fdt))); /* close root node */
463
    _FDT((fdt_finish(fdt)));
464

    
465
    return fdt;
466
}
467

    
468
static void spapr_finalize_fdt(sPAPREnvironment *spapr,
469
                               target_phys_addr_t fdt_addr,
470
                               target_phys_addr_t rtas_addr,
471
                               target_phys_addr_t rtas_size)
472
{
473
    int ret;
474
    void *fdt;
475
    sPAPRPHBState *phb;
476

    
477
    fdt = g_malloc(FDT_MAX_SIZE);
478

    
479
    /* open out the base tree into a temp buffer for the final tweaks */
480
    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
481

    
482
    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
483
    if (ret < 0) {
484
        fprintf(stderr, "couldn't setup vio devices in fdt\n");
485
        exit(1);
486
    }
487

    
488
    QLIST_FOREACH(phb, &spapr->phbs, list) {
489
        ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
490
    }
491

    
492
    if (ret < 0) {
493
        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
494
        exit(1);
495
    }
496

    
497
    /* RTAS */
498
    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
499
    if (ret < 0) {
500
        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
501
    }
502

    
503
    /* Advertise NUMA via ibm,associativity */
504
    if (nb_numa_nodes > 1) {
505
        ret = spapr_set_associativity(fdt, spapr);
506
        if (ret < 0) {
507
            fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
508
        }
509
    }
510

    
511
    if (!spapr_has_graphics) {
512
        spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
513
    }
514

    
515
    _FDT((fdt_pack(fdt)));
516

    
517
    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
518
        hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
519
                 fdt_totalsize(fdt), FDT_MAX_SIZE);
520
        exit(1);
521
    }
522

    
523
    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
524

    
525
    g_free(fdt);
526
}
527

    
528
static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
529
{
530
    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
531
}
532

    
533
static void emulate_spapr_hypercall(CPUPPCState *env)
534
{
535
    env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
536
}
537

    
538
static void spapr_reset(void *opaque)
539
{
540
    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
541

    
542
    fprintf(stderr, "sPAPR reset\n");
543

    
544
    /* flush out the hash table */
545
    memset(spapr->htab, 0, spapr->htab_size);
546

    
547
    /* Load the fdt */
548
    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
549
                       spapr->rtas_size);
550

    
551
    /* Set up the entry state */
552
    first_cpu->gpr[3] = spapr->fdt_addr;
553
    first_cpu->gpr[5] = 0;
554
    first_cpu->halted = 0;
555
    first_cpu->nip = spapr->entry_point;
556

    
557
}
558

    
559
static void spapr_cpu_reset(void *opaque)
560
{
561
    PowerPCCPU *cpu = opaque;
562

    
563
    cpu_reset(CPU(cpu));
564
}
565

    
566
static int spapr_vga_init(PCIBus *pci_bus)
567
{
568
    if (std_vga_enabled) {
569
        pci_vga_init(pci_bus);
570
    } else {
571
        fprintf(stderr, "This vga model is not supported,"
572
                "currently it only supports -vga std\n");
573
        return 0;
574
    }
575
    return 1;
576
}
577

    
578
/* pSeries LPAR / sPAPR hardware init */
579
static void ppc_spapr_init(ram_addr_t ram_size,
580
                           const char *boot_device,
581
                           const char *kernel_filename,
582
                           const char *kernel_cmdline,
583
                           const char *initrd_filename,
584
                           const char *cpu_model)
585
{
586
    PowerPCCPU *cpu;
587
    CPUPPCState *env;
588
    int i;
589
    MemoryRegion *sysmem = get_system_memory();
590
    MemoryRegion *ram = g_new(MemoryRegion, 1);
591
    target_phys_addr_t rma_alloc_size, rma_size;
592
    uint32_t initrd_base = 0;
593
    long kernel_size = 0, initrd_size = 0;
594
    long load_limit, rtas_limit, fw_size;
595
    long pteg_shift = 17;
596
    char *filename;
597

    
598
    spapr = g_malloc0(sizeof(*spapr));
599
    QLIST_INIT(&spapr->phbs);
600

    
601
    cpu_ppc_hypercall = emulate_spapr_hypercall;
602

    
603
    /* Allocate RMA if necessary */
604
    rma_alloc_size = kvmppc_alloc_rma("ppc_spapr.rma", sysmem);
605

    
606
    if (rma_alloc_size == -1) {
607
        hw_error("qemu: Unable to create RMA\n");
608
        exit(1);
609
    }
610
    if (rma_alloc_size && (rma_alloc_size < ram_size)) {
611
        rma_size = rma_alloc_size;
612
    } else {
613
        rma_size = ram_size;
614
    }
615

    
616
    /* We place the device tree and RTAS just below either the top of the RMA,
617
     * or just below 2GB, whichever is lowere, so that it can be
618
     * processed with 32-bit real mode code if necessary */
619
    rtas_limit = MIN(rma_size, 0x80000000);
620
    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
621
    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
622
    load_limit = spapr->fdt_addr - FW_OVERHEAD;
623

    
624
    /* init CPUs */
625
    if (cpu_model == NULL) {
626
        cpu_model = kvm_enabled() ? "host" : "POWER7";
627
    }
628
    for (i = 0; i < smp_cpus; i++) {
629
        cpu = cpu_ppc_init(cpu_model);
630
        if (cpu == NULL) {
631
            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
632
            exit(1);
633
        }
634
        env = &cpu->env;
635

    
636
        /* Set time-base frequency to 512 MHz */
637
        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
638
        qemu_register_reset(spapr_cpu_reset, cpu);
639

    
640
        env->hreset_vector = 0x60;
641
        env->hreset_excp_prefix = 0;
642
        env->gpr[3] = env->cpu_index;
643
    }
644

    
645
    /* allocate RAM */
646
    spapr->ram_limit = ram_size;
647
    if (spapr->ram_limit > rma_alloc_size) {
648
        ram_addr_t nonrma_base = rma_alloc_size;
649
        ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size;
650

    
651
        memory_region_init_ram(ram, "ppc_spapr.ram", nonrma_size);
652
        vmstate_register_ram_global(ram);
653
        memory_region_add_subregion(sysmem, nonrma_base, ram);
654
    }
655

    
656
    /* allocate hash page table.  For now we always make this 16mb,
657
     * later we should probably make it scale to the size of guest
658
     * RAM */
659
    spapr->htab_size = 1ULL << (pteg_shift + 7);
660
    spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
661

    
662
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
663
        env->external_htab = spapr->htab;
664
        env->htab_base = -1;
665
        env->htab_mask = spapr->htab_size - 1;
666

    
667
        /* Tell KVM that we're in PAPR mode */
668
        env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
669
                             ((pteg_shift + 7) - 18);
670
        env->spr[SPR_HIOR] = 0;
671

    
672
        if (kvm_enabled()) {
673
            kvmppc_set_papr(env);
674
        }
675
    }
676

    
677
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
678
    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
679
                                           rtas_limit - spapr->rtas_addr);
680
    if (spapr->rtas_size < 0) {
681
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
682
        exit(1);
683
    }
684
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
685
        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
686
                 spapr->rtas_size, RTAS_MAX_SIZE);
687
        exit(1);
688
    }
689
    g_free(filename);
690

    
691

    
692
    /* Set up Interrupt Controller */
693
    spapr->icp = xics_system_init(XICS_IRQS);
694
    spapr->next_irq = 16;
695

    
696
    /* Set up IOMMU */
697
    spapr_iommu_init();
698

    
699
    /* Set up VIO bus */
700
    spapr->vio_bus = spapr_vio_bus_init();
701

    
702
    for (i = 0; i < MAX_SERIAL_PORTS; i++) {
703
        if (serial_hds[i]) {
704
            spapr_vty_create(spapr->vio_bus, serial_hds[i]);
705
        }
706
    }
707

    
708
    /* Set up PCI */
709
    spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
710
                     SPAPR_PCI_MEM_WIN_ADDR,
711
                     SPAPR_PCI_MEM_WIN_SIZE,
712
                     SPAPR_PCI_IO_WIN_ADDR);
713

    
714
    for (i = 0; i < nb_nics; i++) {
715
        NICInfo *nd = &nd_table[i];
716

    
717
        if (!nd->model) {
718
            nd->model = g_strdup("ibmveth");
719
        }
720

    
721
        if (strcmp(nd->model, "ibmveth") == 0) {
722
            spapr_vlan_create(spapr->vio_bus, nd);
723
        } else {
724
            pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
725
        }
726
    }
727

    
728
    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
729
        spapr_vscsi_create(spapr->vio_bus);
730
    }
731

    
732
    /* Graphics */
733
    if (spapr_vga_init(QLIST_FIRST(&spapr->phbs)->host_state.bus)) {
734
        spapr_has_graphics = true;
735
    }
736

    
737
    if (rma_size < (MIN_RMA_SLOF << 20)) {
738
        fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
739
                "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
740
        exit(1);
741
    }
742

    
743
    fprintf(stderr, "sPAPR memory map:\n");
744
    fprintf(stderr, "RTAS                 : 0x%08lx..%08lx\n",
745
            (unsigned long)spapr->rtas_addr,
746
            (unsigned long)(spapr->rtas_addr + spapr->rtas_size - 1));
747
    fprintf(stderr, "FDT                  : 0x%08lx..%08lx\n",
748
            (unsigned long)spapr->fdt_addr,
749
            (unsigned long)(spapr->fdt_addr + FDT_MAX_SIZE - 1));
750

    
751
    if (kernel_filename) {
752
        uint64_t lowaddr = 0;
753

    
754
        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
755
                               NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
756
        if (kernel_size < 0) {
757
            kernel_size = load_image_targphys(kernel_filename,
758
                                              KERNEL_LOAD_ADDR,
759
                                              load_limit - KERNEL_LOAD_ADDR);
760
        }
761
        if (kernel_size < 0) {
762
            fprintf(stderr, "qemu: could not load kernel '%s'\n",
763
                    kernel_filename);
764
            exit(1);
765
        }
766
        fprintf(stderr, "Kernel               : 0x%08x..%08lx\n",
767
                KERNEL_LOAD_ADDR, KERNEL_LOAD_ADDR + kernel_size - 1);
768

    
769
        /* load initrd */
770
        if (initrd_filename) {
771
            /* Try to locate the initrd in the gap between the kernel
772
             * and the firmware. Add a bit of space just in case
773
             */
774
            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
775
            initrd_size = load_image_targphys(initrd_filename, initrd_base,
776
                                              load_limit - initrd_base);
777
            if (initrd_size < 0) {
778
                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
779
                        initrd_filename);
780
                exit(1);
781
            }
782
            fprintf(stderr, "Ramdisk              : 0x%08lx..%08lx\n",
783
                    (long)initrd_base, (long)(initrd_base + initrd_size - 1));
784
        } else {
785
            initrd_base = 0;
786
            initrd_size = 0;
787
        }
788
    }
789

    
790
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME);
791
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
792
    if (fw_size < 0) {
793
        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
794
        exit(1);
795
    }
796
    g_free(filename);
797
    fprintf(stderr, "Firmware load        : 0x%08x..%08lx\n",
798
            0, fw_size);
799
    fprintf(stderr, "Firmware runtime     : 0x%08lx..%08lx\n",
800
            load_limit, (unsigned long)spapr->fdt_addr);
801

    
802
    spapr->entry_point = 0x100;
803

    
804
    /* SLOF will startup the secondary CPUs using RTAS */
805
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
806
        env->halted = 1;
807
    }
808

    
809
    /* Prepare the device tree */
810
    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
811
                                            initrd_base, initrd_size,
812
                                            kernel_size,
813
                                            boot_device, kernel_cmdline,
814
                                            pteg_shift + 7);
815
    assert(spapr->fdt_skel != NULL);
816

    
817
    qemu_register_reset(spapr_reset, spapr);
818
}
819

    
820
static QEMUMachine spapr_machine = {
821
    .name = "pseries",
822
    .desc = "pSeries Logical Partition (PAPR compliant)",
823
    .init = ppc_spapr_init,
824
    .max_cpus = MAX_CPUS,
825
    .no_parallel = 1,
826
    .use_scsi = 1,
827
};
828

    
829
static void spapr_machine_init(void)
830
{
831
    qemu_register_machine(&spapr_machine);
832
}
833

    
834
machine_init(spapr_machine_init);