Statistics
| Branch: | Revision:

root / exec.c @ abb26d63

History | View | Annotate | Download (72.7 kB)

1
/*
2
 *  Virtual page mapping
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "qemu/osdep.h"
33
#include "sysemu/kvm.h"
34
#include "hw/xen.h"
35
#include "qemu/timer.h"
36
#include "qemu/config-file.h"
37
#include "exec/memory.h"
38
#include "sysemu/dma.h"
39
#include "exec/address-spaces.h"
40
#if defined(CONFIG_USER_ONLY)
41
#include <qemu.h>
42
#else /* !CONFIG_USER_ONLY */
43
#include "sysemu/xen-mapcache.h"
44
#include "trace.h"
45
#endif
46
#include "exec/cpu-all.h"
47

    
48
#include "exec/cputlb.h"
49
#include "translate-all.h"
50

    
51
#include "exec/memory-internal.h"
52

    
53
//#define DEBUG_UNASSIGNED
54
//#define DEBUG_SUBPAGE
55

    
56
#if !defined(CONFIG_USER_ONLY)
57
int phys_ram_fd;
58
static int in_migration;
59

    
60
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61

    
62
static MemoryRegion *system_memory;
63
static MemoryRegion *system_io;
64

    
65
AddressSpace address_space_io;
66
AddressSpace address_space_memory;
67
DMAContext dma_context_memory;
68

    
69
MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70
static MemoryRegion io_mem_subpage_ram;
71

    
72
#endif
73

    
74
CPUArchState *first_cpu;
75
/* current CPU in the current thread. It is only valid inside
76
   cpu_exec() */
77
DEFINE_TLS(CPUArchState *,cpu_single_env);
78
/* 0 = Do not count executed instructions.
79
   1 = Precise instruction counting.
80
   2 = Adaptive rate instruction counting.  */
81
int use_icount = 0;
82

    
83
#if !defined(CONFIG_USER_ONLY)
84

    
85
static MemoryRegionSection *phys_sections;
86
static unsigned phys_sections_nb, phys_sections_nb_alloc;
87
static uint16_t phys_section_unassigned;
88
static uint16_t phys_section_notdirty;
89
static uint16_t phys_section_rom;
90
static uint16_t phys_section_watch;
91

    
92
/* Simple allocator for PhysPageEntry nodes */
93
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95

    
96
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97

    
98
static void io_mem_init(void);
99
static void memory_map_init(void);
100
static void *qemu_safe_ram_ptr(ram_addr_t addr);
101

    
102
static MemoryRegion io_mem_watch;
103
#endif
104

    
105
#if !defined(CONFIG_USER_ONLY)
106

    
107
static void phys_map_node_reserve(unsigned nodes)
108
{
109
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110
        typedef PhysPageEntry Node[L2_SIZE];
111
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113
                                      phys_map_nodes_nb + nodes);
114
        phys_map_nodes = g_renew(Node, phys_map_nodes,
115
                                 phys_map_nodes_nb_alloc);
116
    }
117
}
118

    
119
static uint16_t phys_map_node_alloc(void)
120
{
121
    unsigned i;
122
    uint16_t ret;
123

    
124
    ret = phys_map_nodes_nb++;
125
    assert(ret != PHYS_MAP_NODE_NIL);
126
    assert(ret != phys_map_nodes_nb_alloc);
127
    for (i = 0; i < L2_SIZE; ++i) {
128
        phys_map_nodes[ret][i].is_leaf = 0;
129
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130
    }
131
    return ret;
132
}
133

    
134
static void phys_map_nodes_reset(void)
135
{
136
    phys_map_nodes_nb = 0;
137
}
138

    
139

    
140
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141
                                hwaddr *nb, uint16_t leaf,
142
                                int level)
143
{
144
    PhysPageEntry *p;
145
    int i;
146
    hwaddr step = (hwaddr)1 << (level * L2_BITS);
147

    
148
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149
        lp->ptr = phys_map_node_alloc();
150
        p = phys_map_nodes[lp->ptr];
151
        if (level == 0) {
152
            for (i = 0; i < L2_SIZE; i++) {
153
                p[i].is_leaf = 1;
154
                p[i].ptr = phys_section_unassigned;
155
            }
156
        }
157
    } else {
158
        p = phys_map_nodes[lp->ptr];
159
    }
160
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161

    
162
    while (*nb && lp < &p[L2_SIZE]) {
163
        if ((*index & (step - 1)) == 0 && *nb >= step) {
164
            lp->is_leaf = true;
165
            lp->ptr = leaf;
166
            *index += step;
167
            *nb -= step;
168
        } else {
169
            phys_page_set_level(lp, index, nb, leaf, level - 1);
170
        }
171
        ++lp;
172
    }
173
}
174

    
175
static void phys_page_set(AddressSpaceDispatch *d,
176
                          hwaddr index, hwaddr nb,
177
                          uint16_t leaf)
178
{
179
    /* Wildly overreserve - it doesn't matter much. */
180
    phys_map_node_reserve(3 * P_L2_LEVELS);
181

    
182
    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183
}
184

    
185
MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186
{
187
    PhysPageEntry lp = d->phys_map;
188
    PhysPageEntry *p;
189
    int i;
190
    uint16_t s_index = phys_section_unassigned;
191

    
192
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
194
            goto not_found;
195
        }
196
        p = phys_map_nodes[lp.ptr];
197
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198
    }
199

    
200
    s_index = lp.ptr;
201
not_found:
202
    return &phys_sections[s_index];
203
}
204

    
205
bool memory_region_is_unassigned(MemoryRegion *mr)
206
{
207
    return mr != &io_mem_ram && mr != &io_mem_rom
208
        && mr != &io_mem_notdirty && !mr->rom_device
209
        && mr != &io_mem_watch;
210
}
211
#endif
212

    
213
void cpu_exec_init_all(void)
214
{
215
#if !defined(CONFIG_USER_ONLY)
216
    memory_map_init();
217
    io_mem_init();
218
#endif
219
}
220

    
221
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
222

    
223
static int cpu_common_post_load(void *opaque, int version_id)
224
{
225
    CPUArchState *env = opaque;
226

    
227
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
228
       version_id is increased. */
229
    env->interrupt_request &= ~0x01;
230
    tlb_flush(env, 1);
231

    
232
    return 0;
233
}
234

    
235
static const VMStateDescription vmstate_cpu_common = {
236
    .name = "cpu_common",
237
    .version_id = 1,
238
    .minimum_version_id = 1,
239
    .minimum_version_id_old = 1,
240
    .post_load = cpu_common_post_load,
241
    .fields      = (VMStateField []) {
242
        VMSTATE_UINT32(halted, CPUArchState),
243
        VMSTATE_UINT32(interrupt_request, CPUArchState),
244
        VMSTATE_END_OF_LIST()
245
    }
246
};
247
#endif
248

    
249
CPUArchState *qemu_get_cpu(int cpu)
250
{
251
    CPUArchState *env = first_cpu;
252

    
253
    while (env) {
254
        if (env->cpu_index == cpu)
255
            break;
256
        env = env->next_cpu;
257
    }
258

    
259
    return env;
260
}
261

    
262
void cpu_exec_init(CPUArchState *env)
263
{
264
#ifndef CONFIG_USER_ONLY
265
    CPUState *cpu = ENV_GET_CPU(env);
266
#endif
267
    CPUArchState **penv;
268
    int cpu_index;
269

    
270
#if defined(CONFIG_USER_ONLY)
271
    cpu_list_lock();
272
#endif
273
    env->next_cpu = NULL;
274
    penv = &first_cpu;
275
    cpu_index = 0;
276
    while (*penv != NULL) {
277
        penv = &(*penv)->next_cpu;
278
        cpu_index++;
279
    }
280
    env->cpu_index = cpu_index;
281
    env->numa_node = 0;
282
    QTAILQ_INIT(&env->breakpoints);
283
    QTAILQ_INIT(&env->watchpoints);
284
#ifndef CONFIG_USER_ONLY
285
    cpu->thread_id = qemu_get_thread_id();
286
#endif
287
    *penv = env;
288
#if defined(CONFIG_USER_ONLY)
289
    cpu_list_unlock();
290
#endif
291
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
292
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
293
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
294
                    cpu_save, cpu_load, env);
295
#endif
296
}
297

    
298
#if defined(TARGET_HAS_ICE)
299
#if defined(CONFIG_USER_ONLY)
300
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
301
{
302
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
303
}
304
#else
305
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
306
{
307
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
308
            (pc & ~TARGET_PAGE_MASK));
309
}
310
#endif
311
#endif /* TARGET_HAS_ICE */
312

    
313
#if defined(CONFIG_USER_ONLY)
314
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
315

    
316
{
317
}
318

    
319
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
320
                          int flags, CPUWatchpoint **watchpoint)
321
{
322
    return -ENOSYS;
323
}
324
#else
325
/* Add a watchpoint.  */
326
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
327
                          int flags, CPUWatchpoint **watchpoint)
328
{
329
    target_ulong len_mask = ~(len - 1);
330
    CPUWatchpoint *wp;
331

    
332
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
333
    if ((len & (len - 1)) || (addr & ~len_mask) ||
334
            len == 0 || len > TARGET_PAGE_SIZE) {
335
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
336
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
337
        return -EINVAL;
338
    }
339
    wp = g_malloc(sizeof(*wp));
340

    
341
    wp->vaddr = addr;
342
    wp->len_mask = len_mask;
343
    wp->flags = flags;
344

    
345
    /* keep all GDB-injected watchpoints in front */
346
    if (flags & BP_GDB)
347
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
348
    else
349
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
350

    
351
    tlb_flush_page(env, addr);
352

    
353
    if (watchpoint)
354
        *watchpoint = wp;
355
    return 0;
356
}
357

    
358
/* Remove a specific watchpoint.  */
359
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
360
                          int flags)
361
{
362
    target_ulong len_mask = ~(len - 1);
363
    CPUWatchpoint *wp;
364

    
365
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
366
        if (addr == wp->vaddr && len_mask == wp->len_mask
367
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
368
            cpu_watchpoint_remove_by_ref(env, wp);
369
            return 0;
370
        }
371
    }
372
    return -ENOENT;
373
}
374

    
375
/* Remove a specific watchpoint by reference.  */
376
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
377
{
378
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
379

    
380
    tlb_flush_page(env, watchpoint->vaddr);
381

    
382
    g_free(watchpoint);
383
}
384

    
385
/* Remove all matching watchpoints.  */
386
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
387
{
388
    CPUWatchpoint *wp, *next;
389

    
390
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
391
        if (wp->flags & mask)
392
            cpu_watchpoint_remove_by_ref(env, wp);
393
    }
394
}
395
#endif
396

    
397
/* Add a breakpoint.  */
398
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
399
                          CPUBreakpoint **breakpoint)
400
{
401
#if defined(TARGET_HAS_ICE)
402
    CPUBreakpoint *bp;
403

    
404
    bp = g_malloc(sizeof(*bp));
405

    
406
    bp->pc = pc;
407
    bp->flags = flags;
408

    
409
    /* keep all GDB-injected breakpoints in front */
410
    if (flags & BP_GDB)
411
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
412
    else
413
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
414

    
415
    breakpoint_invalidate(env, pc);
416

    
417
    if (breakpoint)
418
        *breakpoint = bp;
419
    return 0;
420
#else
421
    return -ENOSYS;
422
#endif
423
}
424

    
425
/* Remove a specific breakpoint.  */
426
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
427
{
428
#if defined(TARGET_HAS_ICE)
429
    CPUBreakpoint *bp;
430

    
431
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
432
        if (bp->pc == pc && bp->flags == flags) {
433
            cpu_breakpoint_remove_by_ref(env, bp);
434
            return 0;
435
        }
436
    }
437
    return -ENOENT;
438
#else
439
    return -ENOSYS;
440
#endif
441
}
442

    
443
/* Remove a specific breakpoint by reference.  */
444
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
445
{
446
#if defined(TARGET_HAS_ICE)
447
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
448

    
449
    breakpoint_invalidate(env, breakpoint->pc);
450

    
451
    g_free(breakpoint);
452
#endif
453
}
454

    
455
/* Remove all matching breakpoints. */
456
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
457
{
458
#if defined(TARGET_HAS_ICE)
459
    CPUBreakpoint *bp, *next;
460

    
461
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
462
        if (bp->flags & mask)
463
            cpu_breakpoint_remove_by_ref(env, bp);
464
    }
465
#endif
466
}
467

    
468
/* enable or disable single step mode. EXCP_DEBUG is returned by the
469
   CPU loop after each instruction */
470
void cpu_single_step(CPUArchState *env, int enabled)
471
{
472
#if defined(TARGET_HAS_ICE)
473
    if (env->singlestep_enabled != enabled) {
474
        env->singlestep_enabled = enabled;
475
        if (kvm_enabled())
476
            kvm_update_guest_debug(env, 0);
477
        else {
478
            /* must flush all the translated code to avoid inconsistencies */
479
            /* XXX: only flush what is necessary */
480
            tb_flush(env);
481
        }
482
    }
483
#endif
484
}
485

    
486
void cpu_reset_interrupt(CPUArchState *env, int mask)
487
{
488
    env->interrupt_request &= ~mask;
489
}
490

    
491
void cpu_exit(CPUArchState *env)
492
{
493
    env->exit_request = 1;
494
    cpu_unlink_tb(env);
495
}
496

    
497
void cpu_abort(CPUArchState *env, const char *fmt, ...)
498
{
499
    va_list ap;
500
    va_list ap2;
501

    
502
    va_start(ap, fmt);
503
    va_copy(ap2, ap);
504
    fprintf(stderr, "qemu: fatal: ");
505
    vfprintf(stderr, fmt, ap);
506
    fprintf(stderr, "\n");
507
    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
508
    if (qemu_log_enabled()) {
509
        qemu_log("qemu: fatal: ");
510
        qemu_log_vprintf(fmt, ap2);
511
        qemu_log("\n");
512
        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
513
        qemu_log_flush();
514
        qemu_log_close();
515
    }
516
    va_end(ap2);
517
    va_end(ap);
518
#if defined(CONFIG_USER_ONLY)
519
    {
520
        struct sigaction act;
521
        sigfillset(&act.sa_mask);
522
        act.sa_handler = SIG_DFL;
523
        sigaction(SIGABRT, &act, NULL);
524
    }
525
#endif
526
    abort();
527
}
528

    
529
CPUArchState *cpu_copy(CPUArchState *env)
530
{
531
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
532
    CPUArchState *next_cpu = new_env->next_cpu;
533
    int cpu_index = new_env->cpu_index;
534
#if defined(TARGET_HAS_ICE)
535
    CPUBreakpoint *bp;
536
    CPUWatchpoint *wp;
537
#endif
538

    
539
    memcpy(new_env, env, sizeof(CPUArchState));
540

    
541
    /* Preserve chaining and index. */
542
    new_env->next_cpu = next_cpu;
543
    new_env->cpu_index = cpu_index;
544

    
545
    /* Clone all break/watchpoints.
546
       Note: Once we support ptrace with hw-debug register access, make sure
547
       BP_CPU break/watchpoints are handled correctly on clone. */
548
    QTAILQ_INIT(&env->breakpoints);
549
    QTAILQ_INIT(&env->watchpoints);
550
#if defined(TARGET_HAS_ICE)
551
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
552
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
553
    }
554
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
555
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
556
                              wp->flags, NULL);
557
    }
558
#endif
559

    
560
    return new_env;
561
}
562

    
563
#if !defined(CONFIG_USER_ONLY)
564
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
565
                                      uintptr_t length)
566
{
567
    uintptr_t start1;
568

    
569
    /* we modify the TLB cache so that the dirty bit will be set again
570
       when accessing the range */
571
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
572
    /* Check that we don't span multiple blocks - this breaks the
573
       address comparisons below.  */
574
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
575
            != (end - 1) - start) {
576
        abort();
577
    }
578
    cpu_tlb_reset_dirty_all(start1, length);
579

    
580
}
581

    
582
/* Note: start and end must be within the same ram block.  */
583
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
584
                                     int dirty_flags)
585
{
586
    uintptr_t length;
587

    
588
    start &= TARGET_PAGE_MASK;
589
    end = TARGET_PAGE_ALIGN(end);
590

    
591
    length = end - start;
592
    if (length == 0)
593
        return;
594
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
595

    
596
    if (tcg_enabled()) {
597
        tlb_reset_dirty_range_all(start, end, length);
598
    }
599
}
600

    
601
static int cpu_physical_memory_set_dirty_tracking(int enable)
602
{
603
    int ret = 0;
604
    in_migration = enable;
605
    return ret;
606
}
607

    
608
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
609
                                                   MemoryRegionSection *section,
610
                                                   target_ulong vaddr,
611
                                                   hwaddr paddr,
612
                                                   int prot,
613
                                                   target_ulong *address)
614
{
615
    hwaddr iotlb;
616
    CPUWatchpoint *wp;
617

    
618
    if (memory_region_is_ram(section->mr)) {
619
        /* Normal RAM.  */
620
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
621
            + memory_region_section_addr(section, paddr);
622
        if (!section->readonly) {
623
            iotlb |= phys_section_notdirty;
624
        } else {
625
            iotlb |= phys_section_rom;
626
        }
627
    } else {
628
        /* IO handlers are currently passed a physical address.
629
           It would be nice to pass an offset from the base address
630
           of that region.  This would avoid having to special case RAM,
631
           and avoid full address decoding in every device.
632
           We can't use the high bits of pd for this because
633
           IO_MEM_ROMD uses these as a ram address.  */
634
        iotlb = section - phys_sections;
635
        iotlb += memory_region_section_addr(section, paddr);
636
    }
637

    
638
    /* Make accesses to pages with watchpoints go via the
639
       watchpoint trap routines.  */
640
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
641
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
642
            /* Avoid trapping reads of pages with a write breakpoint. */
643
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
644
                iotlb = phys_section_watch + paddr;
645
                *address |= TLB_MMIO;
646
                break;
647
            }
648
        }
649
    }
650

    
651
    return iotlb;
652
}
653
#endif /* defined(CONFIG_USER_ONLY) */
654

    
655
#if !defined(CONFIG_USER_ONLY)
656

    
657
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
658
typedef struct subpage_t {
659
    MemoryRegion iomem;
660
    hwaddr base;
661
    uint16_t sub_section[TARGET_PAGE_SIZE];
662
} subpage_t;
663

    
664
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
665
                             uint16_t section);
666
static subpage_t *subpage_init(hwaddr base);
667
static void destroy_page_desc(uint16_t section_index)
668
{
669
    MemoryRegionSection *section = &phys_sections[section_index];
670
    MemoryRegion *mr = section->mr;
671

    
672
    if (mr->subpage) {
673
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
674
        memory_region_destroy(&subpage->iomem);
675
        g_free(subpage);
676
    }
677
}
678

    
679
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
680
{
681
    unsigned i;
682
    PhysPageEntry *p;
683

    
684
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
685
        return;
686
    }
687

    
688
    p = phys_map_nodes[lp->ptr];
689
    for (i = 0; i < L2_SIZE; ++i) {
690
        if (!p[i].is_leaf) {
691
            destroy_l2_mapping(&p[i], level - 1);
692
        } else {
693
            destroy_page_desc(p[i].ptr);
694
        }
695
    }
696
    lp->is_leaf = 0;
697
    lp->ptr = PHYS_MAP_NODE_NIL;
698
}
699

    
700
static void destroy_all_mappings(AddressSpaceDispatch *d)
701
{
702
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
703
    phys_map_nodes_reset();
704
}
705

    
706
static uint16_t phys_section_add(MemoryRegionSection *section)
707
{
708
    if (phys_sections_nb == phys_sections_nb_alloc) {
709
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
710
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
711
                                phys_sections_nb_alloc);
712
    }
713
    phys_sections[phys_sections_nb] = *section;
714
    return phys_sections_nb++;
715
}
716

    
717
static void phys_sections_clear(void)
718
{
719
    phys_sections_nb = 0;
720
}
721

    
722
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
723
{
724
    subpage_t *subpage;
725
    hwaddr base = section->offset_within_address_space
726
        & TARGET_PAGE_MASK;
727
    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
728
    MemoryRegionSection subsection = {
729
        .offset_within_address_space = base,
730
        .size = TARGET_PAGE_SIZE,
731
    };
732
    hwaddr start, end;
733

    
734
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
735

    
736
    if (!(existing->mr->subpage)) {
737
        subpage = subpage_init(base);
738
        subsection.mr = &subpage->iomem;
739
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
740
                      phys_section_add(&subsection));
741
    } else {
742
        subpage = container_of(existing->mr, subpage_t, iomem);
743
    }
744
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
745
    end = start + section->size - 1;
746
    subpage_register(subpage, start, end, phys_section_add(section));
747
}
748

    
749

    
750
static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
751
{
752
    hwaddr start_addr = section->offset_within_address_space;
753
    ram_addr_t size = section->size;
754
    hwaddr addr;
755
    uint16_t section_index = phys_section_add(section);
756

    
757
    assert(size);
758

    
759
    addr = start_addr;
760
    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
761
                  section_index);
762
}
763

    
764
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
765
{
766
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
767
    MemoryRegionSection now = *section, remain = *section;
768

    
769
    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
770
        || (now.size < TARGET_PAGE_SIZE)) {
771
        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
772
                       - now.offset_within_address_space,
773
                       now.size);
774
        register_subpage(d, &now);
775
        remain.size -= now.size;
776
        remain.offset_within_address_space += now.size;
777
        remain.offset_within_region += now.size;
778
    }
779
    while (remain.size >= TARGET_PAGE_SIZE) {
780
        now = remain;
781
        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
782
            now.size = TARGET_PAGE_SIZE;
783
            register_subpage(d, &now);
784
        } else {
785
            now.size &= TARGET_PAGE_MASK;
786
            register_multipage(d, &now);
787
        }
788
        remain.size -= now.size;
789
        remain.offset_within_address_space += now.size;
790
        remain.offset_within_region += now.size;
791
    }
792
    now = remain;
793
    if (now.size) {
794
        register_subpage(d, &now);
795
    }
796
}
797

    
798
void qemu_flush_coalesced_mmio_buffer(void)
799
{
800
    if (kvm_enabled())
801
        kvm_flush_coalesced_mmio_buffer();
802
}
803

    
804
#if defined(__linux__) && !defined(TARGET_S390X)
805

    
806
#include <sys/vfs.h>
807

    
808
#define HUGETLBFS_MAGIC       0x958458f6
809

    
810
static long gethugepagesize(const char *path)
811
{
812
    struct statfs fs;
813
    int ret;
814

    
815
    do {
816
        ret = statfs(path, &fs);
817
    } while (ret != 0 && errno == EINTR);
818

    
819
    if (ret != 0) {
820
        perror(path);
821
        return 0;
822
    }
823

    
824
    if (fs.f_type != HUGETLBFS_MAGIC)
825
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
826

    
827
    return fs.f_bsize;
828
}
829

    
830
static void *file_ram_alloc(RAMBlock *block,
831
                            ram_addr_t memory,
832
                            const char *path)
833
{
834
    char *filename;
835
    void *area;
836
    int fd;
837
#ifdef MAP_POPULATE
838
    int flags;
839
#endif
840
    unsigned long hpagesize;
841

    
842
    hpagesize = gethugepagesize(path);
843
    if (!hpagesize) {
844
        return NULL;
845
    }
846

    
847
    if (memory < hpagesize) {
848
        return NULL;
849
    }
850

    
851
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
852
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
853
        return NULL;
854
    }
855

    
856
    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
857
        return NULL;
858
    }
859

    
860
    fd = mkstemp(filename);
861
    if (fd < 0) {
862
        perror("unable to create backing store for hugepages");
863
        free(filename);
864
        return NULL;
865
    }
866
    unlink(filename);
867
    free(filename);
868

    
869
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
870

    
871
    /*
872
     * ftruncate is not supported by hugetlbfs in older
873
     * hosts, so don't bother bailing out on errors.
874
     * If anything goes wrong with it under other filesystems,
875
     * mmap will fail.
876
     */
877
    if (ftruncate(fd, memory))
878
        perror("ftruncate");
879

    
880
#ifdef MAP_POPULATE
881
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
882
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
883
     * to sidestep this quirk.
884
     */
885
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
886
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
887
#else
888
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
889
#endif
890
    if (area == MAP_FAILED) {
891
        perror("file_ram_alloc: can't mmap RAM pages");
892
        close(fd);
893
        return (NULL);
894
    }
895
    block->fd = fd;
896
    return area;
897
}
898
#endif
899

    
900
static ram_addr_t find_ram_offset(ram_addr_t size)
901
{
902
    RAMBlock *block, *next_block;
903
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
904

    
905
    if (QTAILQ_EMPTY(&ram_list.blocks))
906
        return 0;
907

    
908
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
909
        ram_addr_t end, next = RAM_ADDR_MAX;
910

    
911
        end = block->offset + block->length;
912

    
913
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
914
            if (next_block->offset >= end) {
915
                next = MIN(next, next_block->offset);
916
            }
917
        }
918
        if (next - end >= size && next - end < mingap) {
919
            offset = end;
920
            mingap = next - end;
921
        }
922
    }
923

    
924
    if (offset == RAM_ADDR_MAX) {
925
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
926
                (uint64_t)size);
927
        abort();
928
    }
929

    
930
    return offset;
931
}
932

    
933
ram_addr_t last_ram_offset(void)
934
{
935
    RAMBlock *block;
936
    ram_addr_t last = 0;
937

    
938
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
939
        last = MAX(last, block->offset + block->length);
940

    
941
    return last;
942
}
943

    
944
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
945
{
946
    int ret;
947
    QemuOpts *machine_opts;
948

    
949
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
950
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
951
    if (machine_opts &&
952
        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
953
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
954
        if (ret) {
955
            perror("qemu_madvise");
956
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
957
                            "but dump_guest_core=off specified\n");
958
        }
959
    }
960
}
961

    
962
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
963
{
964
    RAMBlock *new_block, *block;
965

    
966
    new_block = NULL;
967
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
968
        if (block->offset == addr) {
969
            new_block = block;
970
            break;
971
        }
972
    }
973
    assert(new_block);
974
    assert(!new_block->idstr[0]);
975

    
976
    if (dev) {
977
        char *id = qdev_get_dev_path(dev);
978
        if (id) {
979
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
980
            g_free(id);
981
        }
982
    }
983
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
984

    
985
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
986
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
987
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
988
                    new_block->idstr);
989
            abort();
990
        }
991
    }
992
}
993

    
994
static int memory_try_enable_merging(void *addr, size_t len)
995
{
996
    QemuOpts *opts;
997

    
998
    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
999
    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1000
        /* disabled by the user */
1001
        return 0;
1002
    }
1003

    
1004
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1005
}
1006

    
1007
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1008
                                   MemoryRegion *mr)
1009
{
1010
    RAMBlock *block, *new_block;
1011

    
1012
    size = TARGET_PAGE_ALIGN(size);
1013
    new_block = g_malloc0(sizeof(*new_block));
1014

    
1015
    new_block->mr = mr;
1016
    new_block->offset = find_ram_offset(size);
1017
    if (host) {
1018
        new_block->host = host;
1019
        new_block->flags |= RAM_PREALLOC_MASK;
1020
    } else {
1021
        if (mem_path) {
1022
#if defined (__linux__) && !defined(TARGET_S390X)
1023
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1024
            if (!new_block->host) {
1025
                new_block->host = qemu_vmalloc(size);
1026
                memory_try_enable_merging(new_block->host, size);
1027
            }
1028
#else
1029
            fprintf(stderr, "-mem-path option unsupported\n");
1030
            exit(1);
1031
#endif
1032
        } else {
1033
            if (xen_enabled()) {
1034
                xen_ram_alloc(new_block->offset, size, mr);
1035
            } else if (kvm_enabled()) {
1036
                /* some s390/kvm configurations have special constraints */
1037
                new_block->host = kvm_vmalloc(size);
1038
            } else {
1039
                new_block->host = qemu_vmalloc(size);
1040
            }
1041
            memory_try_enable_merging(new_block->host, size);
1042
        }
1043
    }
1044
    new_block->length = size;
1045

    
1046
    /* Keep the list sorted from biggest to smallest block.  */
1047
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1048
        if (block->length < new_block->length) {
1049
            break;
1050
        }
1051
    }
1052
    if (block) {
1053
        QTAILQ_INSERT_BEFORE(block, new_block, next);
1054
    } else {
1055
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1056
    }
1057
    ram_list.mru_block = NULL;
1058

    
1059
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1060
                                       last_ram_offset() >> TARGET_PAGE_BITS);
1061
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1062
           0, size >> TARGET_PAGE_BITS);
1063
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1064

    
1065
    qemu_ram_setup_dump(new_block->host, size);
1066
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1067

    
1068
    if (kvm_enabled())
1069
        kvm_setup_guest_memory(new_block->host, size);
1070

    
1071
    return new_block->offset;
1072
}
1073

    
1074
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1075
{
1076
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1077
}
1078

    
1079
void qemu_ram_free_from_ptr(ram_addr_t addr)
1080
{
1081
    RAMBlock *block;
1082

    
1083
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1084
        if (addr == block->offset) {
1085
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1086
            ram_list.mru_block = NULL;
1087
            g_free(block);
1088
            return;
1089
        }
1090
    }
1091
}
1092

    
1093
void qemu_ram_free(ram_addr_t addr)
1094
{
1095
    RAMBlock *block;
1096

    
1097
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1098
        if (addr == block->offset) {
1099
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1100
            ram_list.mru_block = NULL;
1101
            if (block->flags & RAM_PREALLOC_MASK) {
1102
                ;
1103
            } else if (mem_path) {
1104
#if defined (__linux__) && !defined(TARGET_S390X)
1105
                if (block->fd) {
1106
                    munmap(block->host, block->length);
1107
                    close(block->fd);
1108
                } else {
1109
                    qemu_vfree(block->host);
1110
                }
1111
#else
1112
                abort();
1113
#endif
1114
            } else {
1115
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1116
                munmap(block->host, block->length);
1117
#else
1118
                if (xen_enabled()) {
1119
                    xen_invalidate_map_cache_entry(block->host);
1120
                } else {
1121
                    qemu_vfree(block->host);
1122
                }
1123
#endif
1124
            }
1125
            g_free(block);
1126
            return;
1127
        }
1128
    }
1129

    
1130
}
1131

    
1132
#ifndef _WIN32
1133
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1134
{
1135
    RAMBlock *block;
1136
    ram_addr_t offset;
1137
    int flags;
1138
    void *area, *vaddr;
1139

    
1140
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1141
        offset = addr - block->offset;
1142
        if (offset < block->length) {
1143
            vaddr = block->host + offset;
1144
            if (block->flags & RAM_PREALLOC_MASK) {
1145
                ;
1146
            } else {
1147
                flags = MAP_FIXED;
1148
                munmap(vaddr, length);
1149
                if (mem_path) {
1150
#if defined(__linux__) && !defined(TARGET_S390X)
1151
                    if (block->fd) {
1152
#ifdef MAP_POPULATE
1153
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1154
                            MAP_PRIVATE;
1155
#else
1156
                        flags |= MAP_PRIVATE;
1157
#endif
1158
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1159
                                    flags, block->fd, offset);
1160
                    } else {
1161
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1162
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1163
                                    flags, -1, 0);
1164
                    }
1165
#else
1166
                    abort();
1167
#endif
1168
                } else {
1169
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1170
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
1171
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1172
                                flags, -1, 0);
1173
#else
1174
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1175
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1176
                                flags, -1, 0);
1177
#endif
1178
                }
1179
                if (area != vaddr) {
1180
                    fprintf(stderr, "Could not remap addr: "
1181
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1182
                            length, addr);
1183
                    exit(1);
1184
                }
1185
                memory_try_enable_merging(vaddr, length);
1186
                qemu_ram_setup_dump(vaddr, length);
1187
            }
1188
            return;
1189
        }
1190
    }
1191
}
1192
#endif /* !_WIN32 */
1193

    
1194
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1195
   With the exception of the softmmu code in this file, this should
1196
   only be used for local memory (e.g. video ram) that the device owns,
1197
   and knows it isn't going to access beyond the end of the block.
1198

1199
   It should not be used for general purpose DMA.
1200
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1201
 */
1202
void *qemu_get_ram_ptr(ram_addr_t addr)
1203
{
1204
    RAMBlock *block;
1205

    
1206
    block = ram_list.mru_block;
1207
    if (block && addr - block->offset < block->length) {
1208
        goto found;
1209
    }
1210
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1211
        if (addr - block->offset < block->length) {
1212
            goto found;
1213
        }
1214
    }
1215

    
1216
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1217
    abort();
1218

    
1219
found:
1220
    ram_list.mru_block = block;
1221
    if (xen_enabled()) {
1222
        /* We need to check if the requested address is in the RAM
1223
         * because we don't want to map the entire memory in QEMU.
1224
         * In that case just map until the end of the page.
1225
         */
1226
        if (block->offset == 0) {
1227
            return xen_map_cache(addr, 0, 0);
1228
        } else if (block->host == NULL) {
1229
            block->host =
1230
                xen_map_cache(block->offset, block->length, 1);
1231
        }
1232
    }
1233
    return block->host + (addr - block->offset);
1234
}
1235

    
1236
/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1237
 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1238
 *
1239
 * ??? Is this still necessary?
1240
 */
1241
static void *qemu_safe_ram_ptr(ram_addr_t addr)
1242
{
1243
    RAMBlock *block;
1244

    
1245
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1246
        if (addr - block->offset < block->length) {
1247
            if (xen_enabled()) {
1248
                /* We need to check if the requested address is in the RAM
1249
                 * because we don't want to map the entire memory in QEMU.
1250
                 * In that case just map until the end of the page.
1251
                 */
1252
                if (block->offset == 0) {
1253
                    return xen_map_cache(addr, 0, 0);
1254
                } else if (block->host == NULL) {
1255
                    block->host =
1256
                        xen_map_cache(block->offset, block->length, 1);
1257
                }
1258
            }
1259
            return block->host + (addr - block->offset);
1260
        }
1261
    }
1262

    
1263
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1264
    abort();
1265

    
1266
    return NULL;
1267
}
1268

    
1269
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1270
 * but takes a size argument */
1271
static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1272
{
1273
    if (*size == 0) {
1274
        return NULL;
1275
    }
1276
    if (xen_enabled()) {
1277
        return xen_map_cache(addr, *size, 1);
1278
    } else {
1279
        RAMBlock *block;
1280

    
1281
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1282
            if (addr - block->offset < block->length) {
1283
                if (addr - block->offset + *size > block->length)
1284
                    *size = block->length - addr + block->offset;
1285
                return block->host + (addr - block->offset);
1286
            }
1287
        }
1288

    
1289
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1290
        abort();
1291
    }
1292
}
1293

    
1294
void qemu_put_ram_ptr(void *addr)
1295
{
1296
    trace_qemu_put_ram_ptr(addr);
1297
}
1298

    
1299
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1300
{
1301
    RAMBlock *block;
1302
    uint8_t *host = ptr;
1303

    
1304
    if (xen_enabled()) {
1305
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1306
        return 0;
1307
    }
1308

    
1309
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1310
        /* This case append when the block is not mapped. */
1311
        if (block->host == NULL) {
1312
            continue;
1313
        }
1314
        if (host - block->host < block->length) {
1315
            *ram_addr = block->offset + (host - block->host);
1316
            return 0;
1317
        }
1318
    }
1319

    
1320
    return -1;
1321
}
1322

    
1323
/* Some of the softmmu routines need to translate from a host pointer
1324
   (typically a TLB entry) back to a ram offset.  */
1325
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1326
{
1327
    ram_addr_t ram_addr;
1328

    
1329
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1330
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
1331
        abort();
1332
    }
1333
    return ram_addr;
1334
}
1335

    
1336
static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1337
                                    unsigned size)
1338
{
1339
#ifdef DEBUG_UNASSIGNED
1340
    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1341
#endif
1342
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1343
    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1344
#endif
1345
    return 0;
1346
}
1347

    
1348
static void unassigned_mem_write(void *opaque, hwaddr addr,
1349
                                 uint64_t val, unsigned size)
1350
{
1351
#ifdef DEBUG_UNASSIGNED
1352
    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1353
#endif
1354
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1355
    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1356
#endif
1357
}
1358

    
1359
static const MemoryRegionOps unassigned_mem_ops = {
1360
    .read = unassigned_mem_read,
1361
    .write = unassigned_mem_write,
1362
    .endianness = DEVICE_NATIVE_ENDIAN,
1363
};
1364

    
1365
static uint64_t error_mem_read(void *opaque, hwaddr addr,
1366
                               unsigned size)
1367
{
1368
    abort();
1369
}
1370

    
1371
static void error_mem_write(void *opaque, hwaddr addr,
1372
                            uint64_t value, unsigned size)
1373
{
1374
    abort();
1375
}
1376

    
1377
static const MemoryRegionOps error_mem_ops = {
1378
    .read = error_mem_read,
1379
    .write = error_mem_write,
1380
    .endianness = DEVICE_NATIVE_ENDIAN,
1381
};
1382

    
1383
static const MemoryRegionOps rom_mem_ops = {
1384
    .read = error_mem_read,
1385
    .write = unassigned_mem_write,
1386
    .endianness = DEVICE_NATIVE_ENDIAN,
1387
};
1388

    
1389
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1390
                               uint64_t val, unsigned size)
1391
{
1392
    int dirty_flags;
1393
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1394
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1395
#if !defined(CONFIG_USER_ONLY)
1396
        tb_invalidate_phys_page_fast(ram_addr, size);
1397
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1398
#endif
1399
    }
1400
    switch (size) {
1401
    case 1:
1402
        stb_p(qemu_get_ram_ptr(ram_addr), val);
1403
        break;
1404
    case 2:
1405
        stw_p(qemu_get_ram_ptr(ram_addr), val);
1406
        break;
1407
    case 4:
1408
        stl_p(qemu_get_ram_ptr(ram_addr), val);
1409
        break;
1410
    default:
1411
        abort();
1412
    }
1413
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1414
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1415
    /* we remove the notdirty callback only if the code has been
1416
       flushed */
1417
    if (dirty_flags == 0xff)
1418
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1419
}
1420

    
1421
static const MemoryRegionOps notdirty_mem_ops = {
1422
    .read = error_mem_read,
1423
    .write = notdirty_mem_write,
1424
    .endianness = DEVICE_NATIVE_ENDIAN,
1425
};
1426

    
1427
/* Generate a debug exception if a watchpoint has been hit.  */
1428
static void check_watchpoint(int offset, int len_mask, int flags)
1429
{
1430
    CPUArchState *env = cpu_single_env;
1431
    target_ulong pc, cs_base;
1432
    target_ulong vaddr;
1433
    CPUWatchpoint *wp;
1434
    int cpu_flags;
1435

    
1436
    if (env->watchpoint_hit) {
1437
        /* We re-entered the check after replacing the TB. Now raise
1438
         * the debug interrupt so that is will trigger after the
1439
         * current instruction. */
1440
        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1441
        return;
1442
    }
1443
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1444
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1445
        if ((vaddr == (wp->vaddr & len_mask) ||
1446
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1447
            wp->flags |= BP_WATCHPOINT_HIT;
1448
            if (!env->watchpoint_hit) {
1449
                env->watchpoint_hit = wp;
1450
                tb_check_watchpoint(env);
1451
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1452
                    env->exception_index = EXCP_DEBUG;
1453
                    cpu_loop_exit(env);
1454
                } else {
1455
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1456
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1457
                    cpu_resume_from_signal(env, NULL);
1458
                }
1459
            }
1460
        } else {
1461
            wp->flags &= ~BP_WATCHPOINT_HIT;
1462
        }
1463
    }
1464
}
1465

    
1466
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1467
   so these check for a hit then pass through to the normal out-of-line
1468
   phys routines.  */
1469
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1470
                               unsigned size)
1471
{
1472
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1473
    switch (size) {
1474
    case 1: return ldub_phys(addr);
1475
    case 2: return lduw_phys(addr);
1476
    case 4: return ldl_phys(addr);
1477
    default: abort();
1478
    }
1479
}
1480

    
1481
static void watch_mem_write(void *opaque, hwaddr addr,
1482
                            uint64_t val, unsigned size)
1483
{
1484
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1485
    switch (size) {
1486
    case 1:
1487
        stb_phys(addr, val);
1488
        break;
1489
    case 2:
1490
        stw_phys(addr, val);
1491
        break;
1492
    case 4:
1493
        stl_phys(addr, val);
1494
        break;
1495
    default: abort();
1496
    }
1497
}
1498

    
1499
static const MemoryRegionOps watch_mem_ops = {
1500
    .read = watch_mem_read,
1501
    .write = watch_mem_write,
1502
    .endianness = DEVICE_NATIVE_ENDIAN,
1503
};
1504

    
1505
static uint64_t subpage_read(void *opaque, hwaddr addr,
1506
                             unsigned len)
1507
{
1508
    subpage_t *mmio = opaque;
1509
    unsigned int idx = SUBPAGE_IDX(addr);
1510
    MemoryRegionSection *section;
1511
#if defined(DEBUG_SUBPAGE)
1512
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1513
           mmio, len, addr, idx);
1514
#endif
1515

    
1516
    section = &phys_sections[mmio->sub_section[idx]];
1517
    addr += mmio->base;
1518
    addr -= section->offset_within_address_space;
1519
    addr += section->offset_within_region;
1520
    return io_mem_read(section->mr, addr, len);
1521
}
1522

    
1523
static void subpage_write(void *opaque, hwaddr addr,
1524
                          uint64_t value, unsigned len)
1525
{
1526
    subpage_t *mmio = opaque;
1527
    unsigned int idx = SUBPAGE_IDX(addr);
1528
    MemoryRegionSection *section;
1529
#if defined(DEBUG_SUBPAGE)
1530
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1531
           " idx %d value %"PRIx64"\n",
1532
           __func__, mmio, len, addr, idx, value);
1533
#endif
1534

    
1535
    section = &phys_sections[mmio->sub_section[idx]];
1536
    addr += mmio->base;
1537
    addr -= section->offset_within_address_space;
1538
    addr += section->offset_within_region;
1539
    io_mem_write(section->mr, addr, value, len);
1540
}
1541

    
1542
static const MemoryRegionOps subpage_ops = {
1543
    .read = subpage_read,
1544
    .write = subpage_write,
1545
    .endianness = DEVICE_NATIVE_ENDIAN,
1546
};
1547

    
1548
static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1549
                                 unsigned size)
1550
{
1551
    ram_addr_t raddr = addr;
1552
    void *ptr = qemu_get_ram_ptr(raddr);
1553
    switch (size) {
1554
    case 1: return ldub_p(ptr);
1555
    case 2: return lduw_p(ptr);
1556
    case 4: return ldl_p(ptr);
1557
    default: abort();
1558
    }
1559
}
1560

    
1561
static void subpage_ram_write(void *opaque, hwaddr addr,
1562
                              uint64_t value, unsigned size)
1563
{
1564
    ram_addr_t raddr = addr;
1565
    void *ptr = qemu_get_ram_ptr(raddr);
1566
    switch (size) {
1567
    case 1: return stb_p(ptr, value);
1568
    case 2: return stw_p(ptr, value);
1569
    case 4: return stl_p(ptr, value);
1570
    default: abort();
1571
    }
1572
}
1573

    
1574
static const MemoryRegionOps subpage_ram_ops = {
1575
    .read = subpage_ram_read,
1576
    .write = subpage_ram_write,
1577
    .endianness = DEVICE_NATIVE_ENDIAN,
1578
};
1579

    
1580
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1581
                             uint16_t section)
1582
{
1583
    int idx, eidx;
1584

    
1585
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1586
        return -1;
1587
    idx = SUBPAGE_IDX(start);
1588
    eidx = SUBPAGE_IDX(end);
1589
#if defined(DEBUG_SUBPAGE)
1590
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1591
           mmio, start, end, idx, eidx, memory);
1592
#endif
1593
    if (memory_region_is_ram(phys_sections[section].mr)) {
1594
        MemoryRegionSection new_section = phys_sections[section];
1595
        new_section.mr = &io_mem_subpage_ram;
1596
        section = phys_section_add(&new_section);
1597
    }
1598
    for (; idx <= eidx; idx++) {
1599
        mmio->sub_section[idx] = section;
1600
    }
1601

    
1602
    return 0;
1603
}
1604

    
1605
static subpage_t *subpage_init(hwaddr base)
1606
{
1607
    subpage_t *mmio;
1608

    
1609
    mmio = g_malloc0(sizeof(subpage_t));
1610

    
1611
    mmio->base = base;
1612
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1613
                          "subpage", TARGET_PAGE_SIZE);
1614
    mmio->iomem.subpage = true;
1615
#if defined(DEBUG_SUBPAGE)
1616
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1617
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1618
#endif
1619
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1620

    
1621
    return mmio;
1622
}
1623

    
1624
static uint16_t dummy_section(MemoryRegion *mr)
1625
{
1626
    MemoryRegionSection section = {
1627
        .mr = mr,
1628
        .offset_within_address_space = 0,
1629
        .offset_within_region = 0,
1630
        .size = UINT64_MAX,
1631
    };
1632

    
1633
    return phys_section_add(&section);
1634
}
1635

    
1636
MemoryRegion *iotlb_to_region(hwaddr index)
1637
{
1638
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1639
}
1640

    
1641
static void io_mem_init(void)
1642
{
1643
    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1644
    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1645
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1646
                          "unassigned", UINT64_MAX);
1647
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1648
                          "notdirty", UINT64_MAX);
1649
    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1650
                          "subpage-ram", UINT64_MAX);
1651
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1652
                          "watch", UINT64_MAX);
1653
}
1654

    
1655
static void mem_begin(MemoryListener *listener)
1656
{
1657
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1658

    
1659
    destroy_all_mappings(d);
1660
    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1661
}
1662

    
1663
static void core_begin(MemoryListener *listener)
1664
{
1665
    phys_sections_clear();
1666
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
1667
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
1668
    phys_section_rom = dummy_section(&io_mem_rom);
1669
    phys_section_watch = dummy_section(&io_mem_watch);
1670
}
1671

    
1672
static void tcg_commit(MemoryListener *listener)
1673
{
1674
    CPUArchState *env;
1675

    
1676
    /* since each CPU stores ram addresses in its TLB cache, we must
1677
       reset the modified entries */
1678
    /* XXX: slow ! */
1679
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1680
        tlb_flush(env, 1);
1681
    }
1682
}
1683

    
1684
static void core_log_global_start(MemoryListener *listener)
1685
{
1686
    cpu_physical_memory_set_dirty_tracking(1);
1687
}
1688

    
1689
static void core_log_global_stop(MemoryListener *listener)
1690
{
1691
    cpu_physical_memory_set_dirty_tracking(0);
1692
}
1693

    
1694
static void io_region_add(MemoryListener *listener,
1695
                          MemoryRegionSection *section)
1696
{
1697
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1698

    
1699
    mrio->mr = section->mr;
1700
    mrio->offset = section->offset_within_region;
1701
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1702
                 section->offset_within_address_space, section->size);
1703
    ioport_register(&mrio->iorange);
1704
}
1705

    
1706
static void io_region_del(MemoryListener *listener,
1707
                          MemoryRegionSection *section)
1708
{
1709
    isa_unassign_ioport(section->offset_within_address_space, section->size);
1710
}
1711

    
1712
static MemoryListener core_memory_listener = {
1713
    .begin = core_begin,
1714
    .log_global_start = core_log_global_start,
1715
    .log_global_stop = core_log_global_stop,
1716
    .priority = 1,
1717
};
1718

    
1719
static MemoryListener io_memory_listener = {
1720
    .region_add = io_region_add,
1721
    .region_del = io_region_del,
1722
    .priority = 0,
1723
};
1724

    
1725
static MemoryListener tcg_memory_listener = {
1726
    .commit = tcg_commit,
1727
};
1728

    
1729
void address_space_init_dispatch(AddressSpace *as)
1730
{
1731
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1732

    
1733
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1734
    d->listener = (MemoryListener) {
1735
        .begin = mem_begin,
1736
        .region_add = mem_add,
1737
        .region_nop = mem_add,
1738
        .priority = 0,
1739
    };
1740
    as->dispatch = d;
1741
    memory_listener_register(&d->listener, as);
1742
}
1743

    
1744
void address_space_destroy_dispatch(AddressSpace *as)
1745
{
1746
    AddressSpaceDispatch *d = as->dispatch;
1747

    
1748
    memory_listener_unregister(&d->listener);
1749
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1750
    g_free(d);
1751
    as->dispatch = NULL;
1752
}
1753

    
1754
static void memory_map_init(void)
1755
{
1756
    system_memory = g_malloc(sizeof(*system_memory));
1757
    memory_region_init(system_memory, "system", INT64_MAX);
1758
    address_space_init(&address_space_memory, system_memory);
1759
    address_space_memory.name = "memory";
1760

    
1761
    system_io = g_malloc(sizeof(*system_io));
1762
    memory_region_init(system_io, "io", 65536);
1763
    address_space_init(&address_space_io, system_io);
1764
    address_space_io.name = "I/O";
1765

    
1766
    memory_listener_register(&core_memory_listener, &address_space_memory);
1767
    memory_listener_register(&io_memory_listener, &address_space_io);
1768
    memory_listener_register(&tcg_memory_listener, &address_space_memory);
1769

    
1770
    dma_context_init(&dma_context_memory, &address_space_memory,
1771
                     NULL, NULL, NULL);
1772
}
1773

    
1774
MemoryRegion *get_system_memory(void)
1775
{
1776
    return system_memory;
1777
}
1778

    
1779
MemoryRegion *get_system_io(void)
1780
{
1781
    return system_io;
1782
}
1783

    
1784
#endif /* !defined(CONFIG_USER_ONLY) */
1785

    
1786
/* physical memory access (slow version, mainly for debug) */
1787
#if defined(CONFIG_USER_ONLY)
1788
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1789
                        uint8_t *buf, int len, int is_write)
1790
{
1791
    int l, flags;
1792
    target_ulong page;
1793
    void * p;
1794

    
1795
    while (len > 0) {
1796
        page = addr & TARGET_PAGE_MASK;
1797
        l = (page + TARGET_PAGE_SIZE) - addr;
1798
        if (l > len)
1799
            l = len;
1800
        flags = page_get_flags(page);
1801
        if (!(flags & PAGE_VALID))
1802
            return -1;
1803
        if (is_write) {
1804
            if (!(flags & PAGE_WRITE))
1805
                return -1;
1806
            /* XXX: this code should not depend on lock_user */
1807
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1808
                return -1;
1809
            memcpy(p, buf, l);
1810
            unlock_user(p, addr, l);
1811
        } else {
1812
            if (!(flags & PAGE_READ))
1813
                return -1;
1814
            /* XXX: this code should not depend on lock_user */
1815
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1816
                return -1;
1817
            memcpy(buf, p, l);
1818
            unlock_user(p, addr, 0);
1819
        }
1820
        len -= l;
1821
        buf += l;
1822
        addr += l;
1823
    }
1824
    return 0;
1825
}
1826

    
1827
#else
1828

    
1829
static void invalidate_and_set_dirty(hwaddr addr,
1830
                                     hwaddr length)
1831
{
1832
    if (!cpu_physical_memory_is_dirty(addr)) {
1833
        /* invalidate code */
1834
        tb_invalidate_phys_page_range(addr, addr + length, 0);
1835
        /* set dirty bit */
1836
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1837
    }
1838
    xen_modified_memory(addr, length);
1839
}
1840

    
1841
void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1842
                      int len, bool is_write)
1843
{
1844
    AddressSpaceDispatch *d = as->dispatch;
1845
    int l;
1846
    uint8_t *ptr;
1847
    uint32_t val;
1848
    hwaddr page;
1849
    MemoryRegionSection *section;
1850

    
1851
    while (len > 0) {
1852
        page = addr & TARGET_PAGE_MASK;
1853
        l = (page + TARGET_PAGE_SIZE) - addr;
1854
        if (l > len)
1855
            l = len;
1856
        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1857

    
1858
        if (is_write) {
1859
            if (!memory_region_is_ram(section->mr)) {
1860
                hwaddr addr1;
1861
                addr1 = memory_region_section_addr(section, addr);
1862
                /* XXX: could force cpu_single_env to NULL to avoid
1863
                   potential bugs */
1864
                if (l >= 4 && ((addr1 & 3) == 0)) {
1865
                    /* 32 bit write access */
1866
                    val = ldl_p(buf);
1867
                    io_mem_write(section->mr, addr1, val, 4);
1868
                    l = 4;
1869
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
1870
                    /* 16 bit write access */
1871
                    val = lduw_p(buf);
1872
                    io_mem_write(section->mr, addr1, val, 2);
1873
                    l = 2;
1874
                } else {
1875
                    /* 8 bit write access */
1876
                    val = ldub_p(buf);
1877
                    io_mem_write(section->mr, addr1, val, 1);
1878
                    l = 1;
1879
                }
1880
            } else if (!section->readonly) {
1881
                ram_addr_t addr1;
1882
                addr1 = memory_region_get_ram_addr(section->mr)
1883
                    + memory_region_section_addr(section, addr);
1884
                /* RAM case */
1885
                ptr = qemu_get_ram_ptr(addr1);
1886
                memcpy(ptr, buf, l);
1887
                invalidate_and_set_dirty(addr1, l);
1888
                qemu_put_ram_ptr(ptr);
1889
            }
1890
        } else {
1891
            if (!(memory_region_is_ram(section->mr) ||
1892
                  memory_region_is_romd(section->mr))) {
1893
                hwaddr addr1;
1894
                /* I/O case */
1895
                addr1 = memory_region_section_addr(section, addr);
1896
                if (l >= 4 && ((addr1 & 3) == 0)) {
1897
                    /* 32 bit read access */
1898
                    val = io_mem_read(section->mr, addr1, 4);
1899
                    stl_p(buf, val);
1900
                    l = 4;
1901
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
1902
                    /* 16 bit read access */
1903
                    val = io_mem_read(section->mr, addr1, 2);
1904
                    stw_p(buf, val);
1905
                    l = 2;
1906
                } else {
1907
                    /* 8 bit read access */
1908
                    val = io_mem_read(section->mr, addr1, 1);
1909
                    stb_p(buf, val);
1910
                    l = 1;
1911
                }
1912
            } else {
1913
                /* RAM case */
1914
                ptr = qemu_get_ram_ptr(section->mr->ram_addr
1915
                                       + memory_region_section_addr(section,
1916
                                                                    addr));
1917
                memcpy(buf, ptr, l);
1918
                qemu_put_ram_ptr(ptr);
1919
            }
1920
        }
1921
        len -= l;
1922
        buf += l;
1923
        addr += l;
1924
    }
1925
}
1926

    
1927
void address_space_write(AddressSpace *as, hwaddr addr,
1928
                         const uint8_t *buf, int len)
1929
{
1930
    address_space_rw(as, addr, (uint8_t *)buf, len, true);
1931
}
1932

    
1933
/**
1934
 * address_space_read: read from an address space.
1935
 *
1936
 * @as: #AddressSpace to be accessed
1937
 * @addr: address within that address space
1938
 * @buf: buffer with the data transferred
1939
 */
1940
void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1941
{
1942
    address_space_rw(as, addr, buf, len, false);
1943
}
1944

    
1945

    
1946
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1947
                            int len, int is_write)
1948
{
1949
    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1950
}
1951

    
1952
/* used for ROM loading : can write in RAM and ROM */
1953
void cpu_physical_memory_write_rom(hwaddr addr,
1954
                                   const uint8_t *buf, int len)
1955
{
1956
    AddressSpaceDispatch *d = address_space_memory.dispatch;
1957
    int l;
1958
    uint8_t *ptr;
1959
    hwaddr page;
1960
    MemoryRegionSection *section;
1961

    
1962
    while (len > 0) {
1963
        page = addr & TARGET_PAGE_MASK;
1964
        l = (page + TARGET_PAGE_SIZE) - addr;
1965
        if (l > len)
1966
            l = len;
1967
        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1968

    
1969
        if (!(memory_region_is_ram(section->mr) ||
1970
              memory_region_is_romd(section->mr))) {
1971
            /* do nothing */
1972
        } else {
1973
            unsigned long addr1;
1974
            addr1 = memory_region_get_ram_addr(section->mr)
1975
                + memory_region_section_addr(section, addr);
1976
            /* ROM/RAM case */
1977
            ptr = qemu_get_ram_ptr(addr1);
1978
            memcpy(ptr, buf, l);
1979
            invalidate_and_set_dirty(addr1, l);
1980
            qemu_put_ram_ptr(ptr);
1981
        }
1982
        len -= l;
1983
        buf += l;
1984
        addr += l;
1985
    }
1986
}
1987

    
1988
typedef struct {
1989
    void *buffer;
1990
    hwaddr addr;
1991
    hwaddr len;
1992
} BounceBuffer;
1993

    
1994
static BounceBuffer bounce;
1995

    
1996
typedef struct MapClient {
1997
    void *opaque;
1998
    void (*callback)(void *opaque);
1999
    QLIST_ENTRY(MapClient) link;
2000
} MapClient;
2001

    
2002
static QLIST_HEAD(map_client_list, MapClient) map_client_list
2003
    = QLIST_HEAD_INITIALIZER(map_client_list);
2004

    
2005
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2006
{
2007
    MapClient *client = g_malloc(sizeof(*client));
2008

    
2009
    client->opaque = opaque;
2010
    client->callback = callback;
2011
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2012
    return client;
2013
}
2014

    
2015
static void cpu_unregister_map_client(void *_client)
2016
{
2017
    MapClient *client = (MapClient *)_client;
2018

    
2019
    QLIST_REMOVE(client, link);
2020
    g_free(client);
2021
}
2022

    
2023
static void cpu_notify_map_clients(void)
2024
{
2025
    MapClient *client;
2026

    
2027
    while (!QLIST_EMPTY(&map_client_list)) {
2028
        client = QLIST_FIRST(&map_client_list);
2029
        client->callback(client->opaque);
2030
        cpu_unregister_map_client(client);
2031
    }
2032
}
2033

    
2034
/* Map a physical memory region into a host virtual address.
2035
 * May map a subset of the requested range, given by and returned in *plen.
2036
 * May return NULL if resources needed to perform the mapping are exhausted.
2037
 * Use only for reads OR writes - not for read-modify-write operations.
2038
 * Use cpu_register_map_client() to know when retrying the map operation is
2039
 * likely to succeed.
2040
 */
2041
void *address_space_map(AddressSpace *as,
2042
                        hwaddr addr,
2043
                        hwaddr *plen,
2044
                        bool is_write)
2045
{
2046
    AddressSpaceDispatch *d = as->dispatch;
2047
    hwaddr len = *plen;
2048
    hwaddr todo = 0;
2049
    int l;
2050
    hwaddr page;
2051
    MemoryRegionSection *section;
2052
    ram_addr_t raddr = RAM_ADDR_MAX;
2053
    ram_addr_t rlen;
2054
    void *ret;
2055

    
2056
    while (len > 0) {
2057
        page = addr & TARGET_PAGE_MASK;
2058
        l = (page + TARGET_PAGE_SIZE) - addr;
2059
        if (l > len)
2060
            l = len;
2061
        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2062

    
2063
        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2064
            if (todo || bounce.buffer) {
2065
                break;
2066
            }
2067
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2068
            bounce.addr = addr;
2069
            bounce.len = l;
2070
            if (!is_write) {
2071
                address_space_read(as, addr, bounce.buffer, l);
2072
            }
2073

    
2074
            *plen = l;
2075
            return bounce.buffer;
2076
        }
2077
        if (!todo) {
2078
            raddr = memory_region_get_ram_addr(section->mr)
2079
                + memory_region_section_addr(section, addr);
2080
        }
2081

    
2082
        len -= l;
2083
        addr += l;
2084
        todo += l;
2085
    }
2086
    rlen = todo;
2087
    ret = qemu_ram_ptr_length(raddr, &rlen);
2088
    *plen = rlen;
2089
    return ret;
2090
}
2091

    
2092
/* Unmaps a memory region previously mapped by address_space_map().
2093
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2094
 * the amount of memory that was actually read or written by the caller.
2095
 */
2096
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2097
                         int is_write, hwaddr access_len)
2098
{
2099
    if (buffer != bounce.buffer) {
2100
        if (is_write) {
2101
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2102
            while (access_len) {
2103
                unsigned l;
2104
                l = TARGET_PAGE_SIZE;
2105
                if (l > access_len)
2106
                    l = access_len;
2107
                invalidate_and_set_dirty(addr1, l);
2108
                addr1 += l;
2109
                access_len -= l;
2110
            }
2111
        }
2112
        if (xen_enabled()) {
2113
            xen_invalidate_map_cache_entry(buffer);
2114
        }
2115
        return;
2116
    }
2117
    if (is_write) {
2118
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2119
    }
2120
    qemu_vfree(bounce.buffer);
2121
    bounce.buffer = NULL;
2122
    cpu_notify_map_clients();
2123
}
2124

    
2125
void *cpu_physical_memory_map(hwaddr addr,
2126
                              hwaddr *plen,
2127
                              int is_write)
2128
{
2129
    return address_space_map(&address_space_memory, addr, plen, is_write);
2130
}
2131

    
2132
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2133
                               int is_write, hwaddr access_len)
2134
{
2135
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2136
}
2137

    
2138
/* warning: addr must be aligned */
2139
static inline uint32_t ldl_phys_internal(hwaddr addr,
2140
                                         enum device_endian endian)
2141
{
2142
    uint8_t *ptr;
2143
    uint32_t val;
2144
    MemoryRegionSection *section;
2145

    
2146
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2147

    
2148
    if (!(memory_region_is_ram(section->mr) ||
2149
          memory_region_is_romd(section->mr))) {
2150
        /* I/O case */
2151
        addr = memory_region_section_addr(section, addr);
2152
        val = io_mem_read(section->mr, addr, 4);
2153
#if defined(TARGET_WORDS_BIGENDIAN)
2154
        if (endian == DEVICE_LITTLE_ENDIAN) {
2155
            val = bswap32(val);
2156
        }
2157
#else
2158
        if (endian == DEVICE_BIG_ENDIAN) {
2159
            val = bswap32(val);
2160
        }
2161
#endif
2162
    } else {
2163
        /* RAM case */
2164
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2165
                                & TARGET_PAGE_MASK)
2166
                               + memory_region_section_addr(section, addr));
2167
        switch (endian) {
2168
        case DEVICE_LITTLE_ENDIAN:
2169
            val = ldl_le_p(ptr);
2170
            break;
2171
        case DEVICE_BIG_ENDIAN:
2172
            val = ldl_be_p(ptr);
2173
            break;
2174
        default:
2175
            val = ldl_p(ptr);
2176
            break;
2177
        }
2178
    }
2179
    return val;
2180
}
2181

    
2182
uint32_t ldl_phys(hwaddr addr)
2183
{
2184
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2185
}
2186

    
2187
uint32_t ldl_le_phys(hwaddr addr)
2188
{
2189
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2190
}
2191

    
2192
uint32_t ldl_be_phys(hwaddr addr)
2193
{
2194
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2195
}
2196

    
2197
/* warning: addr must be aligned */
2198
static inline uint64_t ldq_phys_internal(hwaddr addr,
2199
                                         enum device_endian endian)
2200
{
2201
    uint8_t *ptr;
2202
    uint64_t val;
2203
    MemoryRegionSection *section;
2204

    
2205
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2206

    
2207
    if (!(memory_region_is_ram(section->mr) ||
2208
          memory_region_is_romd(section->mr))) {
2209
        /* I/O case */
2210
        addr = memory_region_section_addr(section, addr);
2211

    
2212
        /* XXX This is broken when device endian != cpu endian.
2213
               Fix and add "endian" variable check */
2214
#ifdef TARGET_WORDS_BIGENDIAN
2215
        val = io_mem_read(section->mr, addr, 4) << 32;
2216
        val |= io_mem_read(section->mr, addr + 4, 4);
2217
#else
2218
        val = io_mem_read(section->mr, addr, 4);
2219
        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2220
#endif
2221
    } else {
2222
        /* RAM case */
2223
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2224
                                & TARGET_PAGE_MASK)
2225
                               + memory_region_section_addr(section, addr));
2226
        switch (endian) {
2227
        case DEVICE_LITTLE_ENDIAN:
2228
            val = ldq_le_p(ptr);
2229
            break;
2230
        case DEVICE_BIG_ENDIAN:
2231
            val = ldq_be_p(ptr);
2232
            break;
2233
        default:
2234
            val = ldq_p(ptr);
2235
            break;
2236
        }
2237
    }
2238
    return val;
2239
}
2240

    
2241
uint64_t ldq_phys(hwaddr addr)
2242
{
2243
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2244
}
2245

    
2246
uint64_t ldq_le_phys(hwaddr addr)
2247
{
2248
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2249
}
2250

    
2251
uint64_t ldq_be_phys(hwaddr addr)
2252
{
2253
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2254
}
2255

    
2256
/* XXX: optimize */
2257
uint32_t ldub_phys(hwaddr addr)
2258
{
2259
    uint8_t val;
2260
    cpu_physical_memory_read(addr, &val, 1);
2261
    return val;
2262
}
2263

    
2264
/* warning: addr must be aligned */
2265
static inline uint32_t lduw_phys_internal(hwaddr addr,
2266
                                          enum device_endian endian)
2267
{
2268
    uint8_t *ptr;
2269
    uint64_t val;
2270
    MemoryRegionSection *section;
2271

    
2272
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2273

    
2274
    if (!(memory_region_is_ram(section->mr) ||
2275
          memory_region_is_romd(section->mr))) {
2276
        /* I/O case */
2277
        addr = memory_region_section_addr(section, addr);
2278
        val = io_mem_read(section->mr, addr, 2);
2279
#if defined(TARGET_WORDS_BIGENDIAN)
2280
        if (endian == DEVICE_LITTLE_ENDIAN) {
2281
            val = bswap16(val);
2282
        }
2283
#else
2284
        if (endian == DEVICE_BIG_ENDIAN) {
2285
            val = bswap16(val);
2286
        }
2287
#endif
2288
    } else {
2289
        /* RAM case */
2290
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2291
                                & TARGET_PAGE_MASK)
2292
                               + memory_region_section_addr(section, addr));
2293
        switch (endian) {
2294
        case DEVICE_LITTLE_ENDIAN:
2295
            val = lduw_le_p(ptr);
2296
            break;
2297
        case DEVICE_BIG_ENDIAN:
2298
            val = lduw_be_p(ptr);
2299
            break;
2300
        default:
2301
            val = lduw_p(ptr);
2302
            break;
2303
        }
2304
    }
2305
    return val;
2306
}
2307

    
2308
uint32_t lduw_phys(hwaddr addr)
2309
{
2310
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2311
}
2312

    
2313
uint32_t lduw_le_phys(hwaddr addr)
2314
{
2315
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2316
}
2317

    
2318
uint32_t lduw_be_phys(hwaddr addr)
2319
{
2320
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2321
}
2322

    
2323
/* warning: addr must be aligned. The ram page is not masked as dirty
2324
   and the code inside is not invalidated. It is useful if the dirty
2325
   bits are used to track modified PTEs */
2326
void stl_phys_notdirty(hwaddr addr, uint32_t val)
2327
{
2328
    uint8_t *ptr;
2329
    MemoryRegionSection *section;
2330

    
2331
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2332

    
2333
    if (!memory_region_is_ram(section->mr) || section->readonly) {
2334
        addr = memory_region_section_addr(section, addr);
2335
        if (memory_region_is_ram(section->mr)) {
2336
            section = &phys_sections[phys_section_rom];
2337
        }
2338
        io_mem_write(section->mr, addr, val, 4);
2339
    } else {
2340
        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2341
                               & TARGET_PAGE_MASK)
2342
            + memory_region_section_addr(section, addr);
2343
        ptr = qemu_get_ram_ptr(addr1);
2344
        stl_p(ptr, val);
2345

    
2346
        if (unlikely(in_migration)) {
2347
            if (!cpu_physical_memory_is_dirty(addr1)) {
2348
                /* invalidate code */
2349
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2350
                /* set dirty bit */
2351
                cpu_physical_memory_set_dirty_flags(
2352
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2353
            }
2354
        }
2355
    }
2356
}
2357

    
2358
void stq_phys_notdirty(hwaddr addr, uint64_t val)
2359
{
2360
    uint8_t *ptr;
2361
    MemoryRegionSection *section;
2362

    
2363
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2364

    
2365
    if (!memory_region_is_ram(section->mr) || section->readonly) {
2366
        addr = memory_region_section_addr(section, addr);
2367
        if (memory_region_is_ram(section->mr)) {
2368
            section = &phys_sections[phys_section_rom];
2369
        }
2370
#ifdef TARGET_WORDS_BIGENDIAN
2371
        io_mem_write(section->mr, addr, val >> 32, 4);
2372
        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2373
#else
2374
        io_mem_write(section->mr, addr, (uint32_t)val, 4);
2375
        io_mem_write(section->mr, addr + 4, val >> 32, 4);
2376
#endif
2377
    } else {
2378
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2379
                                & TARGET_PAGE_MASK)
2380
                               + memory_region_section_addr(section, addr));
2381
        stq_p(ptr, val);
2382
    }
2383
}
2384

    
2385
/* warning: addr must be aligned */
2386
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2387
                                     enum device_endian endian)
2388
{
2389
    uint8_t *ptr;
2390
    MemoryRegionSection *section;
2391

    
2392
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2393

    
2394
    if (!memory_region_is_ram(section->mr) || section->readonly) {
2395
        addr = memory_region_section_addr(section, addr);
2396
        if (memory_region_is_ram(section->mr)) {
2397
            section = &phys_sections[phys_section_rom];
2398
        }
2399
#if defined(TARGET_WORDS_BIGENDIAN)
2400
        if (endian == DEVICE_LITTLE_ENDIAN) {
2401
            val = bswap32(val);
2402
        }
2403
#else
2404
        if (endian == DEVICE_BIG_ENDIAN) {
2405
            val = bswap32(val);
2406
        }
2407
#endif
2408
        io_mem_write(section->mr, addr, val, 4);
2409
    } else {
2410
        unsigned long addr1;
2411
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2412
            + memory_region_section_addr(section, addr);
2413
        /* RAM case */
2414
        ptr = qemu_get_ram_ptr(addr1);
2415
        switch (endian) {
2416
        case DEVICE_LITTLE_ENDIAN:
2417
            stl_le_p(ptr, val);
2418
            break;
2419
        case DEVICE_BIG_ENDIAN:
2420
            stl_be_p(ptr, val);
2421
            break;
2422
        default:
2423
            stl_p(ptr, val);
2424
            break;
2425
        }
2426
        invalidate_and_set_dirty(addr1, 4);
2427
    }
2428
}
2429

    
2430
void stl_phys(hwaddr addr, uint32_t val)
2431
{
2432
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2433
}
2434

    
2435
void stl_le_phys(hwaddr addr, uint32_t val)
2436
{
2437
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2438
}
2439

    
2440
void stl_be_phys(hwaddr addr, uint32_t val)
2441
{
2442
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2443
}
2444

    
2445
/* XXX: optimize */
2446
void stb_phys(hwaddr addr, uint32_t val)
2447
{
2448
    uint8_t v = val;
2449
    cpu_physical_memory_write(addr, &v, 1);
2450
}
2451

    
2452
/* warning: addr must be aligned */
2453
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2454
                                     enum device_endian endian)
2455
{
2456
    uint8_t *ptr;
2457
    MemoryRegionSection *section;
2458

    
2459
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2460

    
2461
    if (!memory_region_is_ram(section->mr) || section->readonly) {
2462
        addr = memory_region_section_addr(section, addr);
2463
        if (memory_region_is_ram(section->mr)) {
2464
            section = &phys_sections[phys_section_rom];
2465
        }
2466
#if defined(TARGET_WORDS_BIGENDIAN)
2467
        if (endian == DEVICE_LITTLE_ENDIAN) {
2468
            val = bswap16(val);
2469
        }
2470
#else
2471
        if (endian == DEVICE_BIG_ENDIAN) {
2472
            val = bswap16(val);
2473
        }
2474
#endif
2475
        io_mem_write(section->mr, addr, val, 2);
2476
    } else {
2477
        unsigned long addr1;
2478
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2479
            + memory_region_section_addr(section, addr);
2480
        /* RAM case */
2481
        ptr = qemu_get_ram_ptr(addr1);
2482
        switch (endian) {
2483
        case DEVICE_LITTLE_ENDIAN:
2484
            stw_le_p(ptr, val);
2485
            break;
2486
        case DEVICE_BIG_ENDIAN:
2487
            stw_be_p(ptr, val);
2488
            break;
2489
        default:
2490
            stw_p(ptr, val);
2491
            break;
2492
        }
2493
        invalidate_and_set_dirty(addr1, 2);
2494
    }
2495
}
2496

    
2497
void stw_phys(hwaddr addr, uint32_t val)
2498
{
2499
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2500
}
2501

    
2502
void stw_le_phys(hwaddr addr, uint32_t val)
2503
{
2504
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2505
}
2506

    
2507
void stw_be_phys(hwaddr addr, uint32_t val)
2508
{
2509
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2510
}
2511

    
2512
/* XXX: optimize */
2513
void stq_phys(hwaddr addr, uint64_t val)
2514
{
2515
    val = tswap64(val);
2516
    cpu_physical_memory_write(addr, &val, 8);
2517
}
2518

    
2519
void stq_le_phys(hwaddr addr, uint64_t val)
2520
{
2521
    val = cpu_to_le64(val);
2522
    cpu_physical_memory_write(addr, &val, 8);
2523
}
2524

    
2525
void stq_be_phys(hwaddr addr, uint64_t val)
2526
{
2527
    val = cpu_to_be64(val);
2528
    cpu_physical_memory_write(addr, &val, 8);
2529
}
2530

    
2531
/* virtual memory access for debug (includes writing to ROM) */
2532
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2533
                        uint8_t *buf, int len, int is_write)
2534
{
2535
    int l;
2536
    hwaddr phys_addr;
2537
    target_ulong page;
2538

    
2539
    while (len > 0) {
2540
        page = addr & TARGET_PAGE_MASK;
2541
        phys_addr = cpu_get_phys_page_debug(env, page);
2542
        /* if no physical page mapped, return an error */
2543
        if (phys_addr == -1)
2544
            return -1;
2545
        l = (page + TARGET_PAGE_SIZE) - addr;
2546
        if (l > len)
2547
            l = len;
2548
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2549
        if (is_write)
2550
            cpu_physical_memory_write_rom(phys_addr, buf, l);
2551
        else
2552
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2553
        len -= l;
2554
        buf += l;
2555
        addr += l;
2556
    }
2557
    return 0;
2558
}
2559
#endif
2560

    
2561
#if !defined(CONFIG_USER_ONLY)
2562

    
2563
/*
2564
 * A helper function for the _utterly broken_ virtio device model to find out if
2565
 * it's running on a big endian machine. Don't do this at home kids!
2566
 */
2567
bool virtio_is_big_endian(void);
2568
bool virtio_is_big_endian(void)
2569
{
2570
#if defined(TARGET_WORDS_BIGENDIAN)
2571
    return true;
2572
#else
2573
    return false;
2574
#endif
2575
}
2576

    
2577
#endif
2578

    
2579
#ifndef CONFIG_USER_ONLY
2580
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2581
{
2582
    MemoryRegionSection *section;
2583

    
2584
    section = phys_page_find(address_space_memory.dispatch,
2585
                             phys_addr >> TARGET_PAGE_BITS);
2586

    
2587
    return !(memory_region_is_ram(section->mr) ||
2588
             memory_region_is_romd(section->mr));
2589
}
2590
#endif