Statistics
| Branch: | Revision:

root / exec.c @ 51644ab7

History | View | Annotate | Download (72.3 kB)

1
/*
2
 *  Virtual page mapping
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "qemu/osdep.h"
33
#include "sysemu/kvm.h"
34
#include "hw/xen/xen.h"
35
#include "qemu/timer.h"
36
#include "qemu/config-file.h"
37
#include "exec/memory.h"
38
#include "sysemu/dma.h"
39
#include "exec/address-spaces.h"
40
#if defined(CONFIG_USER_ONLY)
41
#include <qemu.h>
42
#else /* !CONFIG_USER_ONLY */
43
#include "sysemu/xen-mapcache.h"
44
#include "trace.h"
45
#endif
46
#include "exec/cpu-all.h"
47

    
48
#include "exec/cputlb.h"
49
#include "translate-all.h"
50

    
51
#include "exec/memory-internal.h"
52

    
53
//#define DEBUG_SUBPAGE
54

    
55
#if !defined(CONFIG_USER_ONLY)
56
int phys_ram_fd;
57
static int in_migration;
58

    
59
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60

    
61
static MemoryRegion *system_memory;
62
static MemoryRegion *system_io;
63

    
64
AddressSpace address_space_io;
65
AddressSpace address_space_memory;
66
DMAContext dma_context_memory;
67

    
68
MemoryRegion io_mem_rom, io_mem_notdirty;
69
static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
70

    
71
#endif
72

    
73
CPUArchState *first_cpu;
74
/* current CPU in the current thread. It is only valid inside
75
   cpu_exec() */
76
DEFINE_TLS(CPUArchState *,cpu_single_env);
77
/* 0 = Do not count executed instructions.
78
   1 = Precise instruction counting.
79
   2 = Adaptive rate instruction counting.  */
80
int use_icount;
81

    
82
#if !defined(CONFIG_USER_ONLY)
83

    
84
static MemoryRegionSection *phys_sections;
85
static unsigned phys_sections_nb, phys_sections_nb_alloc;
86
static uint16_t phys_section_unassigned;
87
static uint16_t phys_section_notdirty;
88
static uint16_t phys_section_rom;
89
static uint16_t phys_section_watch;
90

    
91
/* Simple allocator for PhysPageEntry nodes */
92
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
93
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
94

    
95
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
96

    
97
static void io_mem_init(void);
98
static void memory_map_init(void);
99
static void *qemu_safe_ram_ptr(ram_addr_t addr);
100

    
101
static MemoryRegion io_mem_watch;
102
#endif
103

    
104
#if !defined(CONFIG_USER_ONLY)
105

    
106
static void phys_map_node_reserve(unsigned nodes)
107
{
108
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
109
        typedef PhysPageEntry Node[L2_SIZE];
110
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
111
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
112
                                      phys_map_nodes_nb + nodes);
113
        phys_map_nodes = g_renew(Node, phys_map_nodes,
114
                                 phys_map_nodes_nb_alloc);
115
    }
116
}
117

    
118
static uint16_t phys_map_node_alloc(void)
119
{
120
    unsigned i;
121
    uint16_t ret;
122

    
123
    ret = phys_map_nodes_nb++;
124
    assert(ret != PHYS_MAP_NODE_NIL);
125
    assert(ret != phys_map_nodes_nb_alloc);
126
    for (i = 0; i < L2_SIZE; ++i) {
127
        phys_map_nodes[ret][i].is_leaf = 0;
128
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
129
    }
130
    return ret;
131
}
132

    
133
static void phys_map_nodes_reset(void)
134
{
135
    phys_map_nodes_nb = 0;
136
}
137

    
138

    
139
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
140
                                hwaddr *nb, uint16_t leaf,
141
                                int level)
142
{
143
    PhysPageEntry *p;
144
    int i;
145
    hwaddr step = (hwaddr)1 << (level * L2_BITS);
146

    
147
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
148
        lp->ptr = phys_map_node_alloc();
149
        p = phys_map_nodes[lp->ptr];
150
        if (level == 0) {
151
            for (i = 0; i < L2_SIZE; i++) {
152
                p[i].is_leaf = 1;
153
                p[i].ptr = phys_section_unassigned;
154
            }
155
        }
156
    } else {
157
        p = phys_map_nodes[lp->ptr];
158
    }
159
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
160

    
161
    while (*nb && lp < &p[L2_SIZE]) {
162
        if ((*index & (step - 1)) == 0 && *nb >= step) {
163
            lp->is_leaf = true;
164
            lp->ptr = leaf;
165
            *index += step;
166
            *nb -= step;
167
        } else {
168
            phys_page_set_level(lp, index, nb, leaf, level - 1);
169
        }
170
        ++lp;
171
    }
172
}
173

    
174
static void phys_page_set(AddressSpaceDispatch *d,
175
                          hwaddr index, hwaddr nb,
176
                          uint16_t leaf)
177
{
178
    /* Wildly overreserve - it doesn't matter much. */
179
    phys_map_node_reserve(3 * P_L2_LEVELS);
180

    
181
    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
182
}
183

    
184
static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
185
{
186
    PhysPageEntry lp = d->phys_map;
187
    PhysPageEntry *p;
188
    int i;
189

    
190
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
191
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
192
            return &phys_sections[phys_section_unassigned];
193
        }
194
        p = phys_map_nodes[lp.ptr];
195
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
196
    }
197
    return &phys_sections[lp.ptr];
198
}
199

    
200
bool memory_region_is_unassigned(MemoryRegion *mr)
201
{
202
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
203
        && mr != &io_mem_watch;
204
}
205

    
206
MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
207
                                             hwaddr *xlat, hwaddr *plen,
208
                                             bool is_write)
209
{
210
    MemoryRegionSection *section;
211
    Int128 diff;
212

    
213
    section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
214
    /* Compute offset within MemoryRegionSection */
215
    addr -= section->offset_within_address_space;
216

    
217
    /* Compute offset within MemoryRegion */
218
    *xlat = addr + section->offset_within_region;
219

    
220
    diff = int128_sub(section->mr->size, int128_make64(addr));
221
    *plen = MIN(int128_get64(diff), *plen);
222
    return section;
223
}
224
#endif
225

    
226
void cpu_exec_init_all(void)
227
{
228
#if !defined(CONFIG_USER_ONLY)
229
    qemu_mutex_init(&ram_list.mutex);
230
    memory_map_init();
231
    io_mem_init();
232
#endif
233
}
234

    
235
#if !defined(CONFIG_USER_ONLY)
236

    
237
static int cpu_common_post_load(void *opaque, int version_id)
238
{
239
    CPUState *cpu = opaque;
240

    
241
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
242
       version_id is increased. */
243
    cpu->interrupt_request &= ~0x01;
244
    tlb_flush(cpu->env_ptr, 1);
245

    
246
    return 0;
247
}
248

    
249
static const VMStateDescription vmstate_cpu_common = {
250
    .name = "cpu_common",
251
    .version_id = 1,
252
    .minimum_version_id = 1,
253
    .minimum_version_id_old = 1,
254
    .post_load = cpu_common_post_load,
255
    .fields      = (VMStateField []) {
256
        VMSTATE_UINT32(halted, CPUState),
257
        VMSTATE_UINT32(interrupt_request, CPUState),
258
        VMSTATE_END_OF_LIST()
259
    }
260
};
261
#else
262
#define vmstate_cpu_common vmstate_dummy
263
#endif
264

    
265
CPUState *qemu_get_cpu(int index)
266
{
267
    CPUArchState *env = first_cpu;
268
    CPUState *cpu = NULL;
269

    
270
    while (env) {
271
        cpu = ENV_GET_CPU(env);
272
        if (cpu->cpu_index == index) {
273
            break;
274
        }
275
        env = env->next_cpu;
276
    }
277

    
278
    return env ? cpu : NULL;
279
}
280

    
281
void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
282
{
283
    CPUArchState *env = first_cpu;
284

    
285
    while (env) {
286
        func(ENV_GET_CPU(env), data);
287
        env = env->next_cpu;
288
    }
289
}
290

    
291
void cpu_exec_init(CPUArchState *env)
292
{
293
    CPUState *cpu = ENV_GET_CPU(env);
294
    CPUClass *cc = CPU_GET_CLASS(cpu);
295
    CPUArchState **penv;
296
    int cpu_index;
297

    
298
#if defined(CONFIG_USER_ONLY)
299
    cpu_list_lock();
300
#endif
301
    env->next_cpu = NULL;
302
    penv = &first_cpu;
303
    cpu_index = 0;
304
    while (*penv != NULL) {
305
        penv = &(*penv)->next_cpu;
306
        cpu_index++;
307
    }
308
    cpu->cpu_index = cpu_index;
309
    cpu->numa_node = 0;
310
    QTAILQ_INIT(&env->breakpoints);
311
    QTAILQ_INIT(&env->watchpoints);
312
#ifndef CONFIG_USER_ONLY
313
    cpu->thread_id = qemu_get_thread_id();
314
#endif
315
    *penv = env;
316
#if defined(CONFIG_USER_ONLY)
317
    cpu_list_unlock();
318
#endif
319
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
320
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
321
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
322
                    cpu_save, cpu_load, env);
323
    assert(cc->vmsd == NULL);
324
#endif
325
    if (cc->vmsd != NULL) {
326
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
327
    }
328
}
329

    
330
#if defined(TARGET_HAS_ICE)
331
#if defined(CONFIG_USER_ONLY)
332
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
333
{
334
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
335
}
336
#else
337
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
338
{
339
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
340
            (pc & ~TARGET_PAGE_MASK));
341
}
342
#endif
343
#endif /* TARGET_HAS_ICE */
344

    
345
#if defined(CONFIG_USER_ONLY)
346
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
347

    
348
{
349
}
350

    
351
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
352
                          int flags, CPUWatchpoint **watchpoint)
353
{
354
    return -ENOSYS;
355
}
356
#else
357
/* Add a watchpoint.  */
358
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
359
                          int flags, CPUWatchpoint **watchpoint)
360
{
361
    target_ulong len_mask = ~(len - 1);
362
    CPUWatchpoint *wp;
363

    
364
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
365
    if ((len & (len - 1)) || (addr & ~len_mask) ||
366
            len == 0 || len > TARGET_PAGE_SIZE) {
367
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
368
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
369
        return -EINVAL;
370
    }
371
    wp = g_malloc(sizeof(*wp));
372

    
373
    wp->vaddr = addr;
374
    wp->len_mask = len_mask;
375
    wp->flags = flags;
376

    
377
    /* keep all GDB-injected watchpoints in front */
378
    if (flags & BP_GDB)
379
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
380
    else
381
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
382

    
383
    tlb_flush_page(env, addr);
384

    
385
    if (watchpoint)
386
        *watchpoint = wp;
387
    return 0;
388
}
389

    
390
/* Remove a specific watchpoint.  */
391
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
392
                          int flags)
393
{
394
    target_ulong len_mask = ~(len - 1);
395
    CPUWatchpoint *wp;
396

    
397
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
398
        if (addr == wp->vaddr && len_mask == wp->len_mask
399
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
400
            cpu_watchpoint_remove_by_ref(env, wp);
401
            return 0;
402
        }
403
    }
404
    return -ENOENT;
405
}
406

    
407
/* Remove a specific watchpoint by reference.  */
408
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
409
{
410
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
411

    
412
    tlb_flush_page(env, watchpoint->vaddr);
413

    
414
    g_free(watchpoint);
415
}
416

    
417
/* Remove all matching watchpoints.  */
418
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
419
{
420
    CPUWatchpoint *wp, *next;
421

    
422
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
423
        if (wp->flags & mask)
424
            cpu_watchpoint_remove_by_ref(env, wp);
425
    }
426
}
427
#endif
428

    
429
/* Add a breakpoint.  */
430
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
431
                          CPUBreakpoint **breakpoint)
432
{
433
#if defined(TARGET_HAS_ICE)
434
    CPUBreakpoint *bp;
435

    
436
    bp = g_malloc(sizeof(*bp));
437

    
438
    bp->pc = pc;
439
    bp->flags = flags;
440

    
441
    /* keep all GDB-injected breakpoints in front */
442
    if (flags & BP_GDB)
443
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
444
    else
445
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
446

    
447
    breakpoint_invalidate(env, pc);
448

    
449
    if (breakpoint)
450
        *breakpoint = bp;
451
    return 0;
452
#else
453
    return -ENOSYS;
454
#endif
455
}
456

    
457
/* Remove a specific breakpoint.  */
458
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
459
{
460
#if defined(TARGET_HAS_ICE)
461
    CPUBreakpoint *bp;
462

    
463
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
464
        if (bp->pc == pc && bp->flags == flags) {
465
            cpu_breakpoint_remove_by_ref(env, bp);
466
            return 0;
467
        }
468
    }
469
    return -ENOENT;
470
#else
471
    return -ENOSYS;
472
#endif
473
}
474

    
475
/* Remove a specific breakpoint by reference.  */
476
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
477
{
478
#if defined(TARGET_HAS_ICE)
479
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
480

    
481
    breakpoint_invalidate(env, breakpoint->pc);
482

    
483
    g_free(breakpoint);
484
#endif
485
}
486

    
487
/* Remove all matching breakpoints. */
488
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
489
{
490
#if defined(TARGET_HAS_ICE)
491
    CPUBreakpoint *bp, *next;
492

    
493
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
494
        if (bp->flags & mask)
495
            cpu_breakpoint_remove_by_ref(env, bp);
496
    }
497
#endif
498
}
499

    
500
/* enable or disable single step mode. EXCP_DEBUG is returned by the
501
   CPU loop after each instruction */
502
void cpu_single_step(CPUArchState *env, int enabled)
503
{
504
#if defined(TARGET_HAS_ICE)
505
    if (env->singlestep_enabled != enabled) {
506
        env->singlestep_enabled = enabled;
507
        if (kvm_enabled())
508
            kvm_update_guest_debug(env, 0);
509
        else {
510
            /* must flush all the translated code to avoid inconsistencies */
511
            /* XXX: only flush what is necessary */
512
            tb_flush(env);
513
        }
514
    }
515
#endif
516
}
517

    
518
void cpu_exit(CPUArchState *env)
519
{
520
    CPUState *cpu = ENV_GET_CPU(env);
521

    
522
    cpu->exit_request = 1;
523
    cpu->tcg_exit_req = 1;
524
}
525

    
526
void cpu_abort(CPUArchState *env, const char *fmt, ...)
527
{
528
    va_list ap;
529
    va_list ap2;
530

    
531
    va_start(ap, fmt);
532
    va_copy(ap2, ap);
533
    fprintf(stderr, "qemu: fatal: ");
534
    vfprintf(stderr, fmt, ap);
535
    fprintf(stderr, "\n");
536
    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
537
    if (qemu_log_enabled()) {
538
        qemu_log("qemu: fatal: ");
539
        qemu_log_vprintf(fmt, ap2);
540
        qemu_log("\n");
541
        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
542
        qemu_log_flush();
543
        qemu_log_close();
544
    }
545
    va_end(ap2);
546
    va_end(ap);
547
#if defined(CONFIG_USER_ONLY)
548
    {
549
        struct sigaction act;
550
        sigfillset(&act.sa_mask);
551
        act.sa_handler = SIG_DFL;
552
        sigaction(SIGABRT, &act, NULL);
553
    }
554
#endif
555
    abort();
556
}
557

    
558
CPUArchState *cpu_copy(CPUArchState *env)
559
{
560
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
561
    CPUArchState *next_cpu = new_env->next_cpu;
562
#if defined(TARGET_HAS_ICE)
563
    CPUBreakpoint *bp;
564
    CPUWatchpoint *wp;
565
#endif
566

    
567
    memcpy(new_env, env, sizeof(CPUArchState));
568

    
569
    /* Preserve chaining. */
570
    new_env->next_cpu = next_cpu;
571

    
572
    /* Clone all break/watchpoints.
573
       Note: Once we support ptrace with hw-debug register access, make sure
574
       BP_CPU break/watchpoints are handled correctly on clone. */
575
    QTAILQ_INIT(&env->breakpoints);
576
    QTAILQ_INIT(&env->watchpoints);
577
#if defined(TARGET_HAS_ICE)
578
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
579
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
580
    }
581
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
582
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
583
                              wp->flags, NULL);
584
    }
585
#endif
586

    
587
    return new_env;
588
}
589

    
590
#if !defined(CONFIG_USER_ONLY)
591
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
592
                                      uintptr_t length)
593
{
594
    uintptr_t start1;
595

    
596
    /* we modify the TLB cache so that the dirty bit will be set again
597
       when accessing the range */
598
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
599
    /* Check that we don't span multiple blocks - this breaks the
600
       address comparisons below.  */
601
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
602
            != (end - 1) - start) {
603
        abort();
604
    }
605
    cpu_tlb_reset_dirty_all(start1, length);
606

    
607
}
608

    
609
/* Note: start and end must be within the same ram block.  */
610
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
611
                                     int dirty_flags)
612
{
613
    uintptr_t length;
614

    
615
    start &= TARGET_PAGE_MASK;
616
    end = TARGET_PAGE_ALIGN(end);
617

    
618
    length = end - start;
619
    if (length == 0)
620
        return;
621
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
622

    
623
    if (tcg_enabled()) {
624
        tlb_reset_dirty_range_all(start, end, length);
625
    }
626
}
627

    
628
static int cpu_physical_memory_set_dirty_tracking(int enable)
629
{
630
    int ret = 0;
631
    in_migration = enable;
632
    return ret;
633
}
634

    
635
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
636
                                       MemoryRegionSection *section,
637
                                       target_ulong vaddr,
638
                                       hwaddr paddr, hwaddr xlat,
639
                                       int prot,
640
                                       target_ulong *address)
641
{
642
    hwaddr iotlb;
643
    CPUWatchpoint *wp;
644

    
645
    if (memory_region_is_ram(section->mr)) {
646
        /* Normal RAM.  */
647
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
648
            + xlat;
649
        if (!section->readonly) {
650
            iotlb |= phys_section_notdirty;
651
        } else {
652
            iotlb |= phys_section_rom;
653
        }
654
    } else {
655
        iotlb = section - phys_sections;
656
        iotlb += xlat;
657
    }
658

    
659
    /* Make accesses to pages with watchpoints go via the
660
       watchpoint trap routines.  */
661
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
662
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
663
            /* Avoid trapping reads of pages with a write breakpoint. */
664
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
665
                iotlb = phys_section_watch + paddr;
666
                *address |= TLB_MMIO;
667
                break;
668
            }
669
        }
670
    }
671

    
672
    return iotlb;
673
}
674
#endif /* defined(CONFIG_USER_ONLY) */
675

    
676
#if !defined(CONFIG_USER_ONLY)
677

    
678
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
679
typedef struct subpage_t {
680
    MemoryRegion iomem;
681
    hwaddr base;
682
    uint16_t sub_section[TARGET_PAGE_SIZE];
683
} subpage_t;
684

    
685
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
686
                             uint16_t section);
687
static subpage_t *subpage_init(hwaddr base);
688
static void destroy_page_desc(uint16_t section_index)
689
{
690
    MemoryRegionSection *section = &phys_sections[section_index];
691
    MemoryRegion *mr = section->mr;
692

    
693
    if (mr->subpage) {
694
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
695
        memory_region_destroy(&subpage->iomem);
696
        g_free(subpage);
697
    }
698
}
699

    
700
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
701
{
702
    unsigned i;
703
    PhysPageEntry *p;
704

    
705
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
706
        return;
707
    }
708

    
709
    p = phys_map_nodes[lp->ptr];
710
    for (i = 0; i < L2_SIZE; ++i) {
711
        if (!p[i].is_leaf) {
712
            destroy_l2_mapping(&p[i], level - 1);
713
        } else {
714
            destroy_page_desc(p[i].ptr);
715
        }
716
    }
717
    lp->is_leaf = 0;
718
    lp->ptr = PHYS_MAP_NODE_NIL;
719
}
720

    
721
static void destroy_all_mappings(AddressSpaceDispatch *d)
722
{
723
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
724
    phys_map_nodes_reset();
725
}
726

    
727
static uint16_t phys_section_add(MemoryRegionSection *section)
728
{
729
    /* The physical section number is ORed with a page-aligned
730
     * pointer to produce the iotlb entries.  Thus it should
731
     * never overflow into the page-aligned value.
732
     */
733
    assert(phys_sections_nb < TARGET_PAGE_SIZE);
734

    
735
    if (phys_sections_nb == phys_sections_nb_alloc) {
736
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
737
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
738
                                phys_sections_nb_alloc);
739
    }
740
    phys_sections[phys_sections_nb] = *section;
741
    return phys_sections_nb++;
742
}
743

    
744
static void phys_sections_clear(void)
745
{
746
    phys_sections_nb = 0;
747
}
748

    
749
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
750
{
751
    subpage_t *subpage;
752
    hwaddr base = section->offset_within_address_space
753
        & TARGET_PAGE_MASK;
754
    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
755
    MemoryRegionSection subsection = {
756
        .offset_within_address_space = base,
757
        .size = TARGET_PAGE_SIZE,
758
    };
759
    hwaddr start, end;
760

    
761
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
762

    
763
    if (!(existing->mr->subpage)) {
764
        subpage = subpage_init(base);
765
        subsection.mr = &subpage->iomem;
766
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
767
                      phys_section_add(&subsection));
768
    } else {
769
        subpage = container_of(existing->mr, subpage_t, iomem);
770
    }
771
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
772
    end = start + section->size - 1;
773
    subpage_register(subpage, start, end, phys_section_add(section));
774
}
775

    
776

    
777
static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
778
{
779
    hwaddr start_addr = section->offset_within_address_space;
780
    ram_addr_t size = section->size;
781
    hwaddr addr;
782
    uint16_t section_index = phys_section_add(section);
783

    
784
    assert(size);
785

    
786
    addr = start_addr;
787
    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
788
                  section_index);
789
}
790

    
791
QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
792

    
793
static MemoryRegionSection limit(MemoryRegionSection section)
794
{
795
    section.size = MIN(section.offset_within_address_space + section.size,
796
                       MAX_PHYS_ADDR + 1)
797
                   - section.offset_within_address_space;
798

    
799
    return section;
800
}
801

    
802
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
803
{
804
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
805
    MemoryRegionSection now = limit(*section), remain = limit(*section);
806

    
807
    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
808
        || (now.size < TARGET_PAGE_SIZE)) {
809
        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
810
                       - now.offset_within_address_space,
811
                       now.size);
812
        register_subpage(d, &now);
813
        remain.size -= now.size;
814
        remain.offset_within_address_space += now.size;
815
        remain.offset_within_region += now.size;
816
    }
817
    while (remain.size >= TARGET_PAGE_SIZE) {
818
        now = remain;
819
        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
820
            now.size = TARGET_PAGE_SIZE;
821
            register_subpage(d, &now);
822
        } else {
823
            now.size &= TARGET_PAGE_MASK;
824
            register_multipage(d, &now);
825
        }
826
        remain.size -= now.size;
827
        remain.offset_within_address_space += now.size;
828
        remain.offset_within_region += now.size;
829
    }
830
    now = remain;
831
    if (now.size) {
832
        register_subpage(d, &now);
833
    }
834
}
835

    
836
void qemu_flush_coalesced_mmio_buffer(void)
837
{
838
    if (kvm_enabled())
839
        kvm_flush_coalesced_mmio_buffer();
840
}
841

    
842
void qemu_mutex_lock_ramlist(void)
843
{
844
    qemu_mutex_lock(&ram_list.mutex);
845
}
846

    
847
void qemu_mutex_unlock_ramlist(void)
848
{
849
    qemu_mutex_unlock(&ram_list.mutex);
850
}
851

    
852
#if defined(__linux__) && !defined(TARGET_S390X)
853

    
854
#include <sys/vfs.h>
855

    
856
#define HUGETLBFS_MAGIC       0x958458f6
857

    
858
static long gethugepagesize(const char *path)
859
{
860
    struct statfs fs;
861
    int ret;
862

    
863
    do {
864
        ret = statfs(path, &fs);
865
    } while (ret != 0 && errno == EINTR);
866

    
867
    if (ret != 0) {
868
        perror(path);
869
        return 0;
870
    }
871

    
872
    if (fs.f_type != HUGETLBFS_MAGIC)
873
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
874

    
875
    return fs.f_bsize;
876
}
877

    
878
static void *file_ram_alloc(RAMBlock *block,
879
                            ram_addr_t memory,
880
                            const char *path)
881
{
882
    char *filename;
883
    char *sanitized_name;
884
    char *c;
885
    void *area;
886
    int fd;
887
#ifdef MAP_POPULATE
888
    int flags;
889
#endif
890
    unsigned long hpagesize;
891

    
892
    hpagesize = gethugepagesize(path);
893
    if (!hpagesize) {
894
        return NULL;
895
    }
896

    
897
    if (memory < hpagesize) {
898
        return NULL;
899
    }
900

    
901
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
902
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
903
        return NULL;
904
    }
905

    
906
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
907
    sanitized_name = g_strdup(block->mr->name);
908
    for (c = sanitized_name; *c != '\0'; c++) {
909
        if (*c == '/')
910
            *c = '_';
911
    }
912

    
913
    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
914
                               sanitized_name);
915
    g_free(sanitized_name);
916

    
917
    fd = mkstemp(filename);
918
    if (fd < 0) {
919
        perror("unable to create backing store for hugepages");
920
        g_free(filename);
921
        return NULL;
922
    }
923
    unlink(filename);
924
    g_free(filename);
925

    
926
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
927

    
928
    /*
929
     * ftruncate is not supported by hugetlbfs in older
930
     * hosts, so don't bother bailing out on errors.
931
     * If anything goes wrong with it under other filesystems,
932
     * mmap will fail.
933
     */
934
    if (ftruncate(fd, memory))
935
        perror("ftruncate");
936

    
937
#ifdef MAP_POPULATE
938
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
939
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
940
     * to sidestep this quirk.
941
     */
942
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
943
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
944
#else
945
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
946
#endif
947
    if (area == MAP_FAILED) {
948
        perror("file_ram_alloc: can't mmap RAM pages");
949
        close(fd);
950
        return (NULL);
951
    }
952
    block->fd = fd;
953
    return area;
954
}
955
#endif
956

    
957
static ram_addr_t find_ram_offset(ram_addr_t size)
958
{
959
    RAMBlock *block, *next_block;
960
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
961

    
962
    assert(size != 0); /* it would hand out same offset multiple times */
963

    
964
    if (QTAILQ_EMPTY(&ram_list.blocks))
965
        return 0;
966

    
967
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
968
        ram_addr_t end, next = RAM_ADDR_MAX;
969

    
970
        end = block->offset + block->length;
971

    
972
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
973
            if (next_block->offset >= end) {
974
                next = MIN(next, next_block->offset);
975
            }
976
        }
977
        if (next - end >= size && next - end < mingap) {
978
            offset = end;
979
            mingap = next - end;
980
        }
981
    }
982

    
983
    if (offset == RAM_ADDR_MAX) {
984
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
985
                (uint64_t)size);
986
        abort();
987
    }
988

    
989
    return offset;
990
}
991

    
992
ram_addr_t last_ram_offset(void)
993
{
994
    RAMBlock *block;
995
    ram_addr_t last = 0;
996

    
997
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
998
        last = MAX(last, block->offset + block->length);
999

    
1000
    return last;
1001
}
1002

    
1003
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1004
{
1005
    int ret;
1006
    QemuOpts *machine_opts;
1007

    
1008
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1009
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1010
    if (machine_opts &&
1011
        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1012
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1013
        if (ret) {
1014
            perror("qemu_madvise");
1015
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1016
                            "but dump_guest_core=off specified\n");
1017
        }
1018
    }
1019
}
1020

    
1021
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1022
{
1023
    RAMBlock *new_block, *block;
1024

    
1025
    new_block = NULL;
1026
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1027
        if (block->offset == addr) {
1028
            new_block = block;
1029
            break;
1030
        }
1031
    }
1032
    assert(new_block);
1033
    assert(!new_block->idstr[0]);
1034

    
1035
    if (dev) {
1036
        char *id = qdev_get_dev_path(dev);
1037
        if (id) {
1038
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1039
            g_free(id);
1040
        }
1041
    }
1042
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1043

    
1044
    /* This assumes the iothread lock is taken here too.  */
1045
    qemu_mutex_lock_ramlist();
1046
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1047
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1048
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1049
                    new_block->idstr);
1050
            abort();
1051
        }
1052
    }
1053
    qemu_mutex_unlock_ramlist();
1054
}
1055

    
1056
static int memory_try_enable_merging(void *addr, size_t len)
1057
{
1058
    QemuOpts *opts;
1059

    
1060
    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1061
    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1062
        /* disabled by the user */
1063
        return 0;
1064
    }
1065

    
1066
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1067
}
1068

    
1069
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1070
                                   MemoryRegion *mr)
1071
{
1072
    RAMBlock *block, *new_block;
1073

    
1074
    size = TARGET_PAGE_ALIGN(size);
1075
    new_block = g_malloc0(sizeof(*new_block));
1076

    
1077
    /* This assumes the iothread lock is taken here too.  */
1078
    qemu_mutex_lock_ramlist();
1079
    new_block->mr = mr;
1080
    new_block->offset = find_ram_offset(size);
1081
    if (host) {
1082
        new_block->host = host;
1083
        new_block->flags |= RAM_PREALLOC_MASK;
1084
    } else {
1085
        if (mem_path) {
1086
#if defined (__linux__) && !defined(TARGET_S390X)
1087
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1088
            if (!new_block->host) {
1089
                new_block->host = qemu_anon_ram_alloc(size);
1090
                memory_try_enable_merging(new_block->host, size);
1091
            }
1092
#else
1093
            fprintf(stderr, "-mem-path option unsupported\n");
1094
            exit(1);
1095
#endif
1096
        } else {
1097
            if (xen_enabled()) {
1098
                xen_ram_alloc(new_block->offset, size, mr);
1099
            } else if (kvm_enabled()) {
1100
                /* some s390/kvm configurations have special constraints */
1101
                new_block->host = kvm_ram_alloc(size);
1102
            } else {
1103
                new_block->host = qemu_anon_ram_alloc(size);
1104
            }
1105
            memory_try_enable_merging(new_block->host, size);
1106
        }
1107
    }
1108
    new_block->length = size;
1109

    
1110
    /* Keep the list sorted from biggest to smallest block.  */
1111
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1112
        if (block->length < new_block->length) {
1113
            break;
1114
        }
1115
    }
1116
    if (block) {
1117
        QTAILQ_INSERT_BEFORE(block, new_block, next);
1118
    } else {
1119
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1120
    }
1121
    ram_list.mru_block = NULL;
1122

    
1123
    ram_list.version++;
1124
    qemu_mutex_unlock_ramlist();
1125

    
1126
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1127
                                       last_ram_offset() >> TARGET_PAGE_BITS);
1128
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1129
           0, size >> TARGET_PAGE_BITS);
1130
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1131

    
1132
    qemu_ram_setup_dump(new_block->host, size);
1133
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1134

    
1135
    if (kvm_enabled())
1136
        kvm_setup_guest_memory(new_block->host, size);
1137

    
1138
    return new_block->offset;
1139
}
1140

    
1141
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1142
{
1143
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1144
}
1145

    
1146
void qemu_ram_free_from_ptr(ram_addr_t addr)
1147
{
1148
    RAMBlock *block;
1149

    
1150
    /* This assumes the iothread lock is taken here too.  */
1151
    qemu_mutex_lock_ramlist();
1152
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1153
        if (addr == block->offset) {
1154
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1155
            ram_list.mru_block = NULL;
1156
            ram_list.version++;
1157
            g_free(block);
1158
            break;
1159
        }
1160
    }
1161
    qemu_mutex_unlock_ramlist();
1162
}
1163

    
1164
void qemu_ram_free(ram_addr_t addr)
1165
{
1166
    RAMBlock *block;
1167

    
1168
    /* This assumes the iothread lock is taken here too.  */
1169
    qemu_mutex_lock_ramlist();
1170
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1171
        if (addr == block->offset) {
1172
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1173
            ram_list.mru_block = NULL;
1174
            ram_list.version++;
1175
            if (block->flags & RAM_PREALLOC_MASK) {
1176
                ;
1177
            } else if (mem_path) {
1178
#if defined (__linux__) && !defined(TARGET_S390X)
1179
                if (block->fd) {
1180
                    munmap(block->host, block->length);
1181
                    close(block->fd);
1182
                } else {
1183
                    qemu_anon_ram_free(block->host, block->length);
1184
                }
1185
#else
1186
                abort();
1187
#endif
1188
            } else {
1189
                if (xen_enabled()) {
1190
                    xen_invalidate_map_cache_entry(block->host);
1191
                } else {
1192
                    qemu_anon_ram_free(block->host, block->length);
1193
                }
1194
            }
1195
            g_free(block);
1196
            break;
1197
        }
1198
    }
1199
    qemu_mutex_unlock_ramlist();
1200

    
1201
}
1202

    
1203
#ifndef _WIN32
1204
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1205
{
1206
    RAMBlock *block;
1207
    ram_addr_t offset;
1208
    int flags;
1209
    void *area, *vaddr;
1210

    
1211
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1212
        offset = addr - block->offset;
1213
        if (offset < block->length) {
1214
            vaddr = block->host + offset;
1215
            if (block->flags & RAM_PREALLOC_MASK) {
1216
                ;
1217
            } else {
1218
                flags = MAP_FIXED;
1219
                munmap(vaddr, length);
1220
                if (mem_path) {
1221
#if defined(__linux__) && !defined(TARGET_S390X)
1222
                    if (block->fd) {
1223
#ifdef MAP_POPULATE
1224
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1225
                            MAP_PRIVATE;
1226
#else
1227
                        flags |= MAP_PRIVATE;
1228
#endif
1229
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1230
                                    flags, block->fd, offset);
1231
                    } else {
1232
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1233
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1234
                                    flags, -1, 0);
1235
                    }
1236
#else
1237
                    abort();
1238
#endif
1239
                } else {
1240
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1241
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
1242
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1243
                                flags, -1, 0);
1244
#else
1245
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1246
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1247
                                flags, -1, 0);
1248
#endif
1249
                }
1250
                if (area != vaddr) {
1251
                    fprintf(stderr, "Could not remap addr: "
1252
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1253
                            length, addr);
1254
                    exit(1);
1255
                }
1256
                memory_try_enable_merging(vaddr, length);
1257
                qemu_ram_setup_dump(vaddr, length);
1258
            }
1259
            return;
1260
        }
1261
    }
1262
}
1263
#endif /* !_WIN32 */
1264

    
1265
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1266
   With the exception of the softmmu code in this file, this should
1267
   only be used for local memory (e.g. video ram) that the device owns,
1268
   and knows it isn't going to access beyond the end of the block.
1269

1270
   It should not be used for general purpose DMA.
1271
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1272
 */
1273
void *qemu_get_ram_ptr(ram_addr_t addr)
1274
{
1275
    RAMBlock *block;
1276

    
1277
    /* The list is protected by the iothread lock here.  */
1278
    block = ram_list.mru_block;
1279
    if (block && addr - block->offset < block->length) {
1280
        goto found;
1281
    }
1282
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1283
        if (addr - block->offset < block->length) {
1284
            goto found;
1285
        }
1286
    }
1287

    
1288
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1289
    abort();
1290

    
1291
found:
1292
    ram_list.mru_block = block;
1293
    if (xen_enabled()) {
1294
        /* We need to check if the requested address is in the RAM
1295
         * because we don't want to map the entire memory in QEMU.
1296
         * In that case just map until the end of the page.
1297
         */
1298
        if (block->offset == 0) {
1299
            return xen_map_cache(addr, 0, 0);
1300
        } else if (block->host == NULL) {
1301
            block->host =
1302
                xen_map_cache(block->offset, block->length, 1);
1303
        }
1304
    }
1305
    return block->host + (addr - block->offset);
1306
}
1307

    
1308
/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1309
 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1310
 *
1311
 * ??? Is this still necessary?
1312
 */
1313
static void *qemu_safe_ram_ptr(ram_addr_t addr)
1314
{
1315
    RAMBlock *block;
1316

    
1317
    /* The list is protected by the iothread lock here.  */
1318
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1319
        if (addr - block->offset < block->length) {
1320
            if (xen_enabled()) {
1321
                /* We need to check if the requested address is in the RAM
1322
                 * because we don't want to map the entire memory in QEMU.
1323
                 * In that case just map until the end of the page.
1324
                 */
1325
                if (block->offset == 0) {
1326
                    return xen_map_cache(addr, 0, 0);
1327
                } else if (block->host == NULL) {
1328
                    block->host =
1329
                        xen_map_cache(block->offset, block->length, 1);
1330
                }
1331
            }
1332
            return block->host + (addr - block->offset);
1333
        }
1334
    }
1335

    
1336
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1337
    abort();
1338

    
1339
    return NULL;
1340
}
1341

    
1342
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1343
 * but takes a size argument */
1344
static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1345
{
1346
    if (*size == 0) {
1347
        return NULL;
1348
    }
1349
    if (xen_enabled()) {
1350
        return xen_map_cache(addr, *size, 1);
1351
    } else {
1352
        RAMBlock *block;
1353

    
1354
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1355
            if (addr - block->offset < block->length) {
1356
                if (addr - block->offset + *size > block->length)
1357
                    *size = block->length - addr + block->offset;
1358
                return block->host + (addr - block->offset);
1359
            }
1360
        }
1361

    
1362
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1363
        abort();
1364
    }
1365
}
1366

    
1367
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1368
{
1369
    RAMBlock *block;
1370
    uint8_t *host = ptr;
1371

    
1372
    if (xen_enabled()) {
1373
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1374
        return 0;
1375
    }
1376

    
1377
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378
        /* This case append when the block is not mapped. */
1379
        if (block->host == NULL) {
1380
            continue;
1381
        }
1382
        if (host - block->host < block->length) {
1383
            *ram_addr = block->offset + (host - block->host);
1384
            return 0;
1385
        }
1386
    }
1387

    
1388
    return -1;
1389
}
1390

    
1391
/* Some of the softmmu routines need to translate from a host pointer
1392
   (typically a TLB entry) back to a ram offset.  */
1393
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1394
{
1395
    ram_addr_t ram_addr;
1396

    
1397
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1398
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
1399
        abort();
1400
    }
1401
    return ram_addr;
1402
}
1403

    
1404
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1405
                               uint64_t val, unsigned size)
1406
{
1407
    int dirty_flags;
1408
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1409
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1410
        tb_invalidate_phys_page_fast(ram_addr, size);
1411
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1412
    }
1413
    switch (size) {
1414
    case 1:
1415
        stb_p(qemu_get_ram_ptr(ram_addr), val);
1416
        break;
1417
    case 2:
1418
        stw_p(qemu_get_ram_ptr(ram_addr), val);
1419
        break;
1420
    case 4:
1421
        stl_p(qemu_get_ram_ptr(ram_addr), val);
1422
        break;
1423
    default:
1424
        abort();
1425
    }
1426
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1427
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1428
    /* we remove the notdirty callback only if the code has been
1429
       flushed */
1430
    if (dirty_flags == 0xff)
1431
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1432
}
1433

    
1434
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1435
                                 unsigned size, bool is_write)
1436
{
1437
    return is_write;
1438
}
1439

    
1440
static const MemoryRegionOps notdirty_mem_ops = {
1441
    .write = notdirty_mem_write,
1442
    .valid.accepts = notdirty_mem_accepts,
1443
    .endianness = DEVICE_NATIVE_ENDIAN,
1444
};
1445

    
1446
/* Generate a debug exception if a watchpoint has been hit.  */
1447
static void check_watchpoint(int offset, int len_mask, int flags)
1448
{
1449
    CPUArchState *env = cpu_single_env;
1450
    target_ulong pc, cs_base;
1451
    target_ulong vaddr;
1452
    CPUWatchpoint *wp;
1453
    int cpu_flags;
1454

    
1455
    if (env->watchpoint_hit) {
1456
        /* We re-entered the check after replacing the TB. Now raise
1457
         * the debug interrupt so that is will trigger after the
1458
         * current instruction. */
1459
        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1460
        return;
1461
    }
1462
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1463
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1464
        if ((vaddr == (wp->vaddr & len_mask) ||
1465
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1466
            wp->flags |= BP_WATCHPOINT_HIT;
1467
            if (!env->watchpoint_hit) {
1468
                env->watchpoint_hit = wp;
1469
                tb_check_watchpoint(env);
1470
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1471
                    env->exception_index = EXCP_DEBUG;
1472
                    cpu_loop_exit(env);
1473
                } else {
1474
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1475
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1476
                    cpu_resume_from_signal(env, NULL);
1477
                }
1478
            }
1479
        } else {
1480
            wp->flags &= ~BP_WATCHPOINT_HIT;
1481
        }
1482
    }
1483
}
1484

    
1485
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1486
   so these check for a hit then pass through to the normal out-of-line
1487
   phys routines.  */
1488
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1489
                               unsigned size)
1490
{
1491
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1492
    switch (size) {
1493
    case 1: return ldub_phys(addr);
1494
    case 2: return lduw_phys(addr);
1495
    case 4: return ldl_phys(addr);
1496
    default: abort();
1497
    }
1498
}
1499

    
1500
static void watch_mem_write(void *opaque, hwaddr addr,
1501
                            uint64_t val, unsigned size)
1502
{
1503
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1504
    switch (size) {
1505
    case 1:
1506
        stb_phys(addr, val);
1507
        break;
1508
    case 2:
1509
        stw_phys(addr, val);
1510
        break;
1511
    case 4:
1512
        stl_phys(addr, val);
1513
        break;
1514
    default: abort();
1515
    }
1516
}
1517

    
1518
static const MemoryRegionOps watch_mem_ops = {
1519
    .read = watch_mem_read,
1520
    .write = watch_mem_write,
1521
    .endianness = DEVICE_NATIVE_ENDIAN,
1522
};
1523

    
1524
static uint64_t subpage_read(void *opaque, hwaddr addr,
1525
                             unsigned len)
1526
{
1527
    subpage_t *mmio = opaque;
1528
    unsigned int idx = SUBPAGE_IDX(addr);
1529
    MemoryRegionSection *section;
1530
#if defined(DEBUG_SUBPAGE)
1531
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1532
           mmio, len, addr, idx);
1533
#endif
1534

    
1535
    section = &phys_sections[mmio->sub_section[idx]];
1536
    addr += mmio->base;
1537
    addr -= section->offset_within_address_space;
1538
    addr += section->offset_within_region;
1539
    return io_mem_read(section->mr, addr, len);
1540
}
1541

    
1542
static void subpage_write(void *opaque, hwaddr addr,
1543
                          uint64_t value, unsigned len)
1544
{
1545
    subpage_t *mmio = opaque;
1546
    unsigned int idx = SUBPAGE_IDX(addr);
1547
    MemoryRegionSection *section;
1548
#if defined(DEBUG_SUBPAGE)
1549
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1550
           " idx %d value %"PRIx64"\n",
1551
           __func__, mmio, len, addr, idx, value);
1552
#endif
1553

    
1554
    section = &phys_sections[mmio->sub_section[idx]];
1555
    addr += mmio->base;
1556
    addr -= section->offset_within_address_space;
1557
    addr += section->offset_within_region;
1558
    io_mem_write(section->mr, addr, value, len);
1559
}
1560

    
1561
static bool subpage_accepts(void *opaque, hwaddr addr,
1562
                            unsigned size, bool is_write)
1563
{
1564
    subpage_t *mmio = opaque;
1565
    unsigned int idx = SUBPAGE_IDX(addr);
1566
    MemoryRegionSection *section;
1567
#if defined(DEBUG_SUBPAGE)
1568
    printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
1569
           " idx %d\n", __func__, mmio,
1570
           is_write ? 'w' : 'r', len, addr, idx);
1571
#endif
1572

    
1573
    section = &phys_sections[mmio->sub_section[idx]];
1574
    addr += mmio->base;
1575
    addr -= section->offset_within_address_space;
1576
    addr += section->offset_within_region;
1577
    return memory_region_access_valid(section->mr, addr, size, is_write);
1578
}
1579

    
1580
static const MemoryRegionOps subpage_ops = {
1581
    .read = subpage_read,
1582
    .write = subpage_write,
1583
    .valid.accepts = subpage_accepts,
1584
    .endianness = DEVICE_NATIVE_ENDIAN,
1585
};
1586

    
1587
static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1588
                                 unsigned size)
1589
{
1590
    ram_addr_t raddr = addr;
1591
    void *ptr = qemu_get_ram_ptr(raddr);
1592
    switch (size) {
1593
    case 1: return ldub_p(ptr);
1594
    case 2: return lduw_p(ptr);
1595
    case 4: return ldl_p(ptr);
1596
    default: abort();
1597
    }
1598
}
1599

    
1600
static void subpage_ram_write(void *opaque, hwaddr addr,
1601
                              uint64_t value, unsigned size)
1602
{
1603
    ram_addr_t raddr = addr;
1604
    void *ptr = qemu_get_ram_ptr(raddr);
1605
    switch (size) {
1606
    case 1: return stb_p(ptr, value);
1607
    case 2: return stw_p(ptr, value);
1608
    case 4: return stl_p(ptr, value);
1609
    default: abort();
1610
    }
1611
}
1612

    
1613
static const MemoryRegionOps subpage_ram_ops = {
1614
    .read = subpage_ram_read,
1615
    .write = subpage_ram_write,
1616
    .endianness = DEVICE_NATIVE_ENDIAN,
1617
};
1618

    
1619
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1620
                             uint16_t section)
1621
{
1622
    int idx, eidx;
1623

    
1624
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1625
        return -1;
1626
    idx = SUBPAGE_IDX(start);
1627
    eidx = SUBPAGE_IDX(end);
1628
#if defined(DEBUG_SUBPAGE)
1629
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1630
           mmio, start, end, idx, eidx, memory);
1631
#endif
1632
    if (memory_region_is_ram(phys_sections[section].mr)) {
1633
        MemoryRegionSection new_section = phys_sections[section];
1634
        new_section.mr = &io_mem_subpage_ram;
1635
        section = phys_section_add(&new_section);
1636
    }
1637
    for (; idx <= eidx; idx++) {
1638
        mmio->sub_section[idx] = section;
1639
    }
1640

    
1641
    return 0;
1642
}
1643

    
1644
static subpage_t *subpage_init(hwaddr base)
1645
{
1646
    subpage_t *mmio;
1647

    
1648
    mmio = g_malloc0(sizeof(subpage_t));
1649

    
1650
    mmio->base = base;
1651
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1652
                          "subpage", TARGET_PAGE_SIZE);
1653
    mmio->iomem.subpage = true;
1654
#if defined(DEBUG_SUBPAGE)
1655
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1656
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1657
#endif
1658
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1659

    
1660
    return mmio;
1661
}
1662

    
1663
static uint16_t dummy_section(MemoryRegion *mr)
1664
{
1665
    MemoryRegionSection section = {
1666
        .mr = mr,
1667
        .offset_within_address_space = 0,
1668
        .offset_within_region = 0,
1669
        .size = UINT64_MAX,
1670
    };
1671

    
1672
    return phys_section_add(&section);
1673
}
1674

    
1675
MemoryRegion *iotlb_to_region(hwaddr index)
1676
{
1677
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1678
}
1679

    
1680
static void io_mem_init(void)
1681
{
1682
    memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1683
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1684
                          "unassigned", UINT64_MAX);
1685
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1686
                          "notdirty", UINT64_MAX);
1687
    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1688
                          "subpage-ram", UINT64_MAX);
1689
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1690
                          "watch", UINT64_MAX);
1691
}
1692

    
1693
static void mem_begin(MemoryListener *listener)
1694
{
1695
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1696

    
1697
    destroy_all_mappings(d);
1698
    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1699
}
1700

    
1701
static void core_begin(MemoryListener *listener)
1702
{
1703
    phys_sections_clear();
1704
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
1705
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
1706
    phys_section_rom = dummy_section(&io_mem_rom);
1707
    phys_section_watch = dummy_section(&io_mem_watch);
1708
}
1709

    
1710
static void tcg_commit(MemoryListener *listener)
1711
{
1712
    CPUArchState *env;
1713

    
1714
    /* since each CPU stores ram addresses in its TLB cache, we must
1715
       reset the modified entries */
1716
    /* XXX: slow ! */
1717
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1718
        tlb_flush(env, 1);
1719
    }
1720
}
1721

    
1722
static void core_log_global_start(MemoryListener *listener)
1723
{
1724
    cpu_physical_memory_set_dirty_tracking(1);
1725
}
1726

    
1727
static void core_log_global_stop(MemoryListener *listener)
1728
{
1729
    cpu_physical_memory_set_dirty_tracking(0);
1730
}
1731

    
1732
static void io_region_add(MemoryListener *listener,
1733
                          MemoryRegionSection *section)
1734
{
1735
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1736

    
1737
    mrio->mr = section->mr;
1738
    mrio->offset = section->offset_within_region;
1739
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1740
                 section->offset_within_address_space, section->size);
1741
    ioport_register(&mrio->iorange);
1742
}
1743

    
1744
static void io_region_del(MemoryListener *listener,
1745
                          MemoryRegionSection *section)
1746
{
1747
    isa_unassign_ioport(section->offset_within_address_space, section->size);
1748
}
1749

    
1750
static MemoryListener core_memory_listener = {
1751
    .begin = core_begin,
1752
    .log_global_start = core_log_global_start,
1753
    .log_global_stop = core_log_global_stop,
1754
    .priority = 1,
1755
};
1756

    
1757
static MemoryListener io_memory_listener = {
1758
    .region_add = io_region_add,
1759
    .region_del = io_region_del,
1760
    .priority = 0,
1761
};
1762

    
1763
static MemoryListener tcg_memory_listener = {
1764
    .commit = tcg_commit,
1765
};
1766

    
1767
void address_space_init_dispatch(AddressSpace *as)
1768
{
1769
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1770

    
1771
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1772
    d->listener = (MemoryListener) {
1773
        .begin = mem_begin,
1774
        .region_add = mem_add,
1775
        .region_nop = mem_add,
1776
        .priority = 0,
1777
    };
1778
    as->dispatch = d;
1779
    memory_listener_register(&d->listener, as);
1780
}
1781

    
1782
void address_space_destroy_dispatch(AddressSpace *as)
1783
{
1784
    AddressSpaceDispatch *d = as->dispatch;
1785

    
1786
    memory_listener_unregister(&d->listener);
1787
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1788
    g_free(d);
1789
    as->dispatch = NULL;
1790
}
1791

    
1792
static void memory_map_init(void)
1793
{
1794
    system_memory = g_malloc(sizeof(*system_memory));
1795
    memory_region_init(system_memory, "system", INT64_MAX);
1796
    address_space_init(&address_space_memory, system_memory);
1797
    address_space_memory.name = "memory";
1798

    
1799
    system_io = g_malloc(sizeof(*system_io));
1800
    memory_region_init(system_io, "io", 65536);
1801
    address_space_init(&address_space_io, system_io);
1802
    address_space_io.name = "I/O";
1803

    
1804
    memory_listener_register(&core_memory_listener, &address_space_memory);
1805
    memory_listener_register(&io_memory_listener, &address_space_io);
1806
    memory_listener_register(&tcg_memory_listener, &address_space_memory);
1807

    
1808
    dma_context_init(&dma_context_memory, &address_space_memory,
1809
                     NULL, NULL, NULL);
1810
}
1811

    
1812
MemoryRegion *get_system_memory(void)
1813
{
1814
    return system_memory;
1815
}
1816

    
1817
MemoryRegion *get_system_io(void)
1818
{
1819
    return system_io;
1820
}
1821

    
1822
#endif /* !defined(CONFIG_USER_ONLY) */
1823

    
1824
/* physical memory access (slow version, mainly for debug) */
1825
#if defined(CONFIG_USER_ONLY)
1826
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1827
                        uint8_t *buf, int len, int is_write)
1828
{
1829
    int l, flags;
1830
    target_ulong page;
1831
    void * p;
1832

    
1833
    while (len > 0) {
1834
        page = addr & TARGET_PAGE_MASK;
1835
        l = (page + TARGET_PAGE_SIZE) - addr;
1836
        if (l > len)
1837
            l = len;
1838
        flags = page_get_flags(page);
1839
        if (!(flags & PAGE_VALID))
1840
            return -1;
1841
        if (is_write) {
1842
            if (!(flags & PAGE_WRITE))
1843
                return -1;
1844
            /* XXX: this code should not depend on lock_user */
1845
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1846
                return -1;
1847
            memcpy(p, buf, l);
1848
            unlock_user(p, addr, l);
1849
        } else {
1850
            if (!(flags & PAGE_READ))
1851
                return -1;
1852
            /* XXX: this code should not depend on lock_user */
1853
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1854
                return -1;
1855
            memcpy(buf, p, l);
1856
            unlock_user(p, addr, 0);
1857
        }
1858
        len -= l;
1859
        buf += l;
1860
        addr += l;
1861
    }
1862
    return 0;
1863
}
1864

    
1865
#else
1866

    
1867
static void invalidate_and_set_dirty(hwaddr addr,
1868
                                     hwaddr length)
1869
{
1870
    if (!cpu_physical_memory_is_dirty(addr)) {
1871
        /* invalidate code */
1872
        tb_invalidate_phys_page_range(addr, addr + length, 0);
1873
        /* set dirty bit */
1874
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1875
    }
1876
    xen_modified_memory(addr, length);
1877
}
1878

    
1879
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1880
{
1881
    if (memory_region_is_ram(mr)) {
1882
        return !(is_write && mr->readonly);
1883
    }
1884
    if (memory_region_is_romd(mr)) {
1885
        return !is_write;
1886
    }
1887

    
1888
    return false;
1889
}
1890

    
1891
static inline int memory_access_size(int l, hwaddr addr)
1892
{
1893
    if (l >= 4 && ((addr & 3) == 0)) {
1894
        return 4;
1895
    }
1896
    if (l >= 2 && ((addr & 1) == 0)) {
1897
        return 2;
1898
    }
1899
    return 1;
1900
}
1901

    
1902
void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1903
                      int len, bool is_write)
1904
{
1905
    hwaddr l;
1906
    uint8_t *ptr;
1907
    uint32_t val;
1908
    hwaddr addr1;
1909
    MemoryRegionSection *section;
1910

    
1911
    while (len > 0) {
1912
        l = len;
1913
        section = address_space_translate(as, addr, &addr1, &l, is_write);
1914

    
1915
        if (is_write) {
1916
            if (!memory_access_is_direct(section->mr, is_write)) {
1917
                l = memory_access_size(l, addr1);
1918
                /* XXX: could force cpu_single_env to NULL to avoid
1919
                   potential bugs */
1920
                if (l == 4) {
1921
                    /* 32 bit write access */
1922
                    val = ldl_p(buf);
1923
                    io_mem_write(section->mr, addr1, val, 4);
1924
                } else if (l == 2) {
1925
                    /* 16 bit write access */
1926
                    val = lduw_p(buf);
1927
                    io_mem_write(section->mr, addr1, val, 2);
1928
                } else {
1929
                    /* 8 bit write access */
1930
                    val = ldub_p(buf);
1931
                    io_mem_write(section->mr, addr1, val, 1);
1932
                }
1933
            } else {
1934
                addr1 += memory_region_get_ram_addr(section->mr);
1935
                /* RAM case */
1936
                ptr = qemu_get_ram_ptr(addr1);
1937
                memcpy(ptr, buf, l);
1938
                invalidate_and_set_dirty(addr1, l);
1939
            }
1940
        } else {
1941
            if (!memory_access_is_direct(section->mr, is_write)) {
1942
                /* I/O case */
1943
                l = memory_access_size(l, addr1);
1944
                if (l == 4) {
1945
                    /* 32 bit read access */
1946
                    val = io_mem_read(section->mr, addr1, 4);
1947
                    stl_p(buf, val);
1948
                } else if (l == 2) {
1949
                    /* 16 bit read access */
1950
                    val = io_mem_read(section->mr, addr1, 2);
1951
                    stw_p(buf, val);
1952
                } else {
1953
                    /* 8 bit read access */
1954
                    val = io_mem_read(section->mr, addr1, 1);
1955
                    stb_p(buf, val);
1956
                }
1957
            } else {
1958
                /* RAM case */
1959
                ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
1960
                memcpy(buf, ptr, l);
1961
            }
1962
        }
1963
        len -= l;
1964
        buf += l;
1965
        addr += l;
1966
    }
1967
}
1968

    
1969
void address_space_write(AddressSpace *as, hwaddr addr,
1970
                         const uint8_t *buf, int len)
1971
{
1972
    address_space_rw(as, addr, (uint8_t *)buf, len, true);
1973
}
1974

    
1975
/**
1976
 * address_space_read: read from an address space.
1977
 *
1978
 * @as: #AddressSpace to be accessed
1979
 * @addr: address within that address space
1980
 * @buf: buffer with the data transferred
1981
 */
1982
void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1983
{
1984
    address_space_rw(as, addr, buf, len, false);
1985
}
1986

    
1987

    
1988
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1989
                            int len, int is_write)
1990
{
1991
    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1992
}
1993

    
1994
/* used for ROM loading : can write in RAM and ROM */
1995
void cpu_physical_memory_write_rom(hwaddr addr,
1996
                                   const uint8_t *buf, int len)
1997
{
1998
    hwaddr l;
1999
    uint8_t *ptr;
2000
    hwaddr addr1;
2001
    MemoryRegionSection *section;
2002

    
2003
    while (len > 0) {
2004
        l = len;
2005
        section = address_space_translate(&address_space_memory,
2006
                                          addr, &addr1, &l, true);
2007

    
2008
        if (!(memory_region_is_ram(section->mr) ||
2009
              memory_region_is_romd(section->mr))) {
2010
            /* do nothing */
2011
        } else {
2012
            addr1 += memory_region_get_ram_addr(section->mr);
2013
            /* ROM/RAM case */
2014
            ptr = qemu_get_ram_ptr(addr1);
2015
            memcpy(ptr, buf, l);
2016
            invalidate_and_set_dirty(addr1, l);
2017
        }
2018
        len -= l;
2019
        buf += l;
2020
        addr += l;
2021
    }
2022
}
2023

    
2024
typedef struct {
2025
    void *buffer;
2026
    hwaddr addr;
2027
    hwaddr len;
2028
} BounceBuffer;
2029

    
2030
static BounceBuffer bounce;
2031

    
2032
typedef struct MapClient {
2033
    void *opaque;
2034
    void (*callback)(void *opaque);
2035
    QLIST_ENTRY(MapClient) link;
2036
} MapClient;
2037

    
2038
static QLIST_HEAD(map_client_list, MapClient) map_client_list
2039
    = QLIST_HEAD_INITIALIZER(map_client_list);
2040

    
2041
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2042
{
2043
    MapClient *client = g_malloc(sizeof(*client));
2044

    
2045
    client->opaque = opaque;
2046
    client->callback = callback;
2047
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2048
    return client;
2049
}
2050

    
2051
static void cpu_unregister_map_client(void *_client)
2052
{
2053
    MapClient *client = (MapClient *)_client;
2054

    
2055
    QLIST_REMOVE(client, link);
2056
    g_free(client);
2057
}
2058

    
2059
static void cpu_notify_map_clients(void)
2060
{
2061
    MapClient *client;
2062

    
2063
    while (!QLIST_EMPTY(&map_client_list)) {
2064
        client = QLIST_FIRST(&map_client_list);
2065
        client->callback(client->opaque);
2066
        cpu_unregister_map_client(client);
2067
    }
2068
}
2069

    
2070
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2071
{
2072
    MemoryRegionSection *section;
2073
    hwaddr l, xlat;
2074

    
2075
    while (len > 0) {
2076
        l = len;
2077
        section = address_space_translate(as, addr, &xlat, &l, is_write);
2078
        if (!memory_access_is_direct(section->mr, is_write)) {
2079
            l = memory_access_size(l, addr);
2080
            if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
2081
                return false;
2082
            }
2083
        }
2084

    
2085
        len -= l;
2086
        addr += l;
2087
    }
2088
    return true;
2089
}
2090

    
2091
/* Map a physical memory region into a host virtual address.
2092
 * May map a subset of the requested range, given by and returned in *plen.
2093
 * May return NULL if resources needed to perform the mapping are exhausted.
2094
 * Use only for reads OR writes - not for read-modify-write operations.
2095
 * Use cpu_register_map_client() to know when retrying the map operation is
2096
 * likely to succeed.
2097
 */
2098
void *address_space_map(AddressSpace *as,
2099
                        hwaddr addr,
2100
                        hwaddr *plen,
2101
                        bool is_write)
2102
{
2103
    hwaddr len = *plen;
2104
    hwaddr todo = 0;
2105
    hwaddr l, xlat;
2106
    MemoryRegionSection *section;
2107
    ram_addr_t raddr = RAM_ADDR_MAX;
2108
    ram_addr_t rlen;
2109
    void *ret;
2110

    
2111
    while (len > 0) {
2112
        l = len;
2113
        section = address_space_translate(as, addr, &xlat, &l, is_write);
2114

    
2115
        if (!memory_access_is_direct(section->mr, is_write)) {
2116
            if (todo || bounce.buffer) {
2117
                break;
2118
            }
2119
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2120
            bounce.addr = addr;
2121
            bounce.len = l;
2122
            if (!is_write) {
2123
                address_space_read(as, addr, bounce.buffer, l);
2124
            }
2125

    
2126
            *plen = l;
2127
            return bounce.buffer;
2128
        }
2129
        if (!todo) {
2130
            raddr = memory_region_get_ram_addr(section->mr) + xlat;
2131
        } else {
2132
            if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2133
                break;
2134
            }
2135
        }
2136

    
2137
        len -= l;
2138
        addr += l;
2139
        todo += l;
2140
    }
2141
    rlen = todo;
2142
    ret = qemu_ram_ptr_length(raddr, &rlen);
2143
    *plen = rlen;
2144
    return ret;
2145
}
2146

    
2147
/* Unmaps a memory region previously mapped by address_space_map().
2148
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2149
 * the amount of memory that was actually read or written by the caller.
2150
 */
2151
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2152
                         int is_write, hwaddr access_len)
2153
{
2154
    if (buffer != bounce.buffer) {
2155
        if (is_write) {
2156
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2157
            while (access_len) {
2158
                unsigned l;
2159
                l = TARGET_PAGE_SIZE;
2160
                if (l > access_len)
2161
                    l = access_len;
2162
                invalidate_and_set_dirty(addr1, l);
2163
                addr1 += l;
2164
                access_len -= l;
2165
            }
2166
        }
2167
        if (xen_enabled()) {
2168
            xen_invalidate_map_cache_entry(buffer);
2169
        }
2170
        return;
2171
    }
2172
    if (is_write) {
2173
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2174
    }
2175
    qemu_vfree(bounce.buffer);
2176
    bounce.buffer = NULL;
2177
    cpu_notify_map_clients();
2178
}
2179

    
2180
void *cpu_physical_memory_map(hwaddr addr,
2181
                              hwaddr *plen,
2182
                              int is_write)
2183
{
2184
    return address_space_map(&address_space_memory, addr, plen, is_write);
2185
}
2186

    
2187
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2188
                               int is_write, hwaddr access_len)
2189
{
2190
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2191
}
2192

    
2193
/* warning: addr must be aligned */
2194
static inline uint32_t ldl_phys_internal(hwaddr addr,
2195
                                         enum device_endian endian)
2196
{
2197
    uint8_t *ptr;
2198
    uint32_t val;
2199
    MemoryRegionSection *section;
2200
    hwaddr l = 4;
2201
    hwaddr addr1;
2202

    
2203
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2204
                                      false);
2205
    if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2206
        /* I/O case */
2207
        val = io_mem_read(section->mr, addr1, 4);
2208
#if defined(TARGET_WORDS_BIGENDIAN)
2209
        if (endian == DEVICE_LITTLE_ENDIAN) {
2210
            val = bswap32(val);
2211
        }
2212
#else
2213
        if (endian == DEVICE_BIG_ENDIAN) {
2214
            val = bswap32(val);
2215
        }
2216
#endif
2217
    } else {
2218
        /* RAM case */
2219
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2220
                                & TARGET_PAGE_MASK)
2221
                               + addr1);
2222
        switch (endian) {
2223
        case DEVICE_LITTLE_ENDIAN:
2224
            val = ldl_le_p(ptr);
2225
            break;
2226
        case DEVICE_BIG_ENDIAN:
2227
            val = ldl_be_p(ptr);
2228
            break;
2229
        default:
2230
            val = ldl_p(ptr);
2231
            break;
2232
        }
2233
    }
2234
    return val;
2235
}
2236

    
2237
uint32_t ldl_phys(hwaddr addr)
2238
{
2239
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2240
}
2241

    
2242
uint32_t ldl_le_phys(hwaddr addr)
2243
{
2244
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2245
}
2246

    
2247
uint32_t ldl_be_phys(hwaddr addr)
2248
{
2249
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2250
}
2251

    
2252
/* warning: addr must be aligned */
2253
static inline uint64_t ldq_phys_internal(hwaddr addr,
2254
                                         enum device_endian endian)
2255
{
2256
    uint8_t *ptr;
2257
    uint64_t val;
2258
    MemoryRegionSection *section;
2259
    hwaddr l = 8;
2260
    hwaddr addr1;
2261

    
2262
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2263
                                      false);
2264
    if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2265
        /* I/O case */
2266

    
2267
        /* XXX This is broken when device endian != cpu endian.
2268
               Fix and add "endian" variable check */
2269
#ifdef TARGET_WORDS_BIGENDIAN
2270
        val = io_mem_read(section->mr, addr1, 4) << 32;
2271
        val |= io_mem_read(section->mr, addr1 + 4, 4);
2272
#else
2273
        val = io_mem_read(section->mr, addr1, 4);
2274
        val |= io_mem_read(section->mr, addr1 + 4, 4) << 32;
2275
#endif
2276
    } else {
2277
        /* RAM case */
2278
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2279
                                & TARGET_PAGE_MASK)
2280
                               + addr1);
2281
        switch (endian) {
2282
        case DEVICE_LITTLE_ENDIAN:
2283
            val = ldq_le_p(ptr);
2284
            break;
2285
        case DEVICE_BIG_ENDIAN:
2286
            val = ldq_be_p(ptr);
2287
            break;
2288
        default:
2289
            val = ldq_p(ptr);
2290
            break;
2291
        }
2292
    }
2293
    return val;
2294
}
2295

    
2296
uint64_t ldq_phys(hwaddr addr)
2297
{
2298
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2299
}
2300

    
2301
uint64_t ldq_le_phys(hwaddr addr)
2302
{
2303
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2304
}
2305

    
2306
uint64_t ldq_be_phys(hwaddr addr)
2307
{
2308
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2309
}
2310

    
2311
/* XXX: optimize */
2312
uint32_t ldub_phys(hwaddr addr)
2313
{
2314
    uint8_t val;
2315
    cpu_physical_memory_read(addr, &val, 1);
2316
    return val;
2317
}
2318

    
2319
/* warning: addr must be aligned */
2320
static inline uint32_t lduw_phys_internal(hwaddr addr,
2321
                                          enum device_endian endian)
2322
{
2323
    uint8_t *ptr;
2324
    uint64_t val;
2325
    MemoryRegionSection *section;
2326
    hwaddr l = 2;
2327
    hwaddr addr1;
2328

    
2329
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2330
                                      false);
2331
    if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2332
        /* I/O case */
2333
        val = io_mem_read(section->mr, addr1, 2);
2334
#if defined(TARGET_WORDS_BIGENDIAN)
2335
        if (endian == DEVICE_LITTLE_ENDIAN) {
2336
            val = bswap16(val);
2337
        }
2338
#else
2339
        if (endian == DEVICE_BIG_ENDIAN) {
2340
            val = bswap16(val);
2341
        }
2342
#endif
2343
    } else {
2344
        /* RAM case */
2345
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2346
                                & TARGET_PAGE_MASK)
2347
                               + addr1);
2348
        switch (endian) {
2349
        case DEVICE_LITTLE_ENDIAN:
2350
            val = lduw_le_p(ptr);
2351
            break;
2352
        case DEVICE_BIG_ENDIAN:
2353
            val = lduw_be_p(ptr);
2354
            break;
2355
        default:
2356
            val = lduw_p(ptr);
2357
            break;
2358
        }
2359
    }
2360
    return val;
2361
}
2362

    
2363
uint32_t lduw_phys(hwaddr addr)
2364
{
2365
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2366
}
2367

    
2368
uint32_t lduw_le_phys(hwaddr addr)
2369
{
2370
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2371
}
2372

    
2373
uint32_t lduw_be_phys(hwaddr addr)
2374
{
2375
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2376
}
2377

    
2378
/* warning: addr must be aligned. The ram page is not masked as dirty
2379
   and the code inside is not invalidated. It is useful if the dirty
2380
   bits are used to track modified PTEs */
2381
void stl_phys_notdirty(hwaddr addr, uint32_t val)
2382
{
2383
    uint8_t *ptr;
2384
    MemoryRegionSection *section;
2385
    hwaddr l = 4;
2386
    hwaddr addr1;
2387

    
2388
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2389
                                      true);
2390
    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2391
        io_mem_write(section->mr, addr1, val, 4);
2392
    } else {
2393
        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2394
        ptr = qemu_get_ram_ptr(addr1);
2395
        stl_p(ptr, val);
2396

    
2397
        if (unlikely(in_migration)) {
2398
            if (!cpu_physical_memory_is_dirty(addr1)) {
2399
                /* invalidate code */
2400
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2401
                /* set dirty bit */
2402
                cpu_physical_memory_set_dirty_flags(
2403
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2404
            }
2405
        }
2406
    }
2407
}
2408

    
2409
/* warning: addr must be aligned */
2410
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2411
                                     enum device_endian endian)
2412
{
2413
    uint8_t *ptr;
2414
    MemoryRegionSection *section;
2415
    hwaddr l = 4;
2416
    hwaddr addr1;
2417

    
2418
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2419
                                      true);
2420
    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2421
#if defined(TARGET_WORDS_BIGENDIAN)
2422
        if (endian == DEVICE_LITTLE_ENDIAN) {
2423
            val = bswap32(val);
2424
        }
2425
#else
2426
        if (endian == DEVICE_BIG_ENDIAN) {
2427
            val = bswap32(val);
2428
        }
2429
#endif
2430
        io_mem_write(section->mr, addr1, val, 4);
2431
    } else {
2432
        /* RAM case */
2433
        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2434
        ptr = qemu_get_ram_ptr(addr1);
2435
        switch (endian) {
2436
        case DEVICE_LITTLE_ENDIAN:
2437
            stl_le_p(ptr, val);
2438
            break;
2439
        case DEVICE_BIG_ENDIAN:
2440
            stl_be_p(ptr, val);
2441
            break;
2442
        default:
2443
            stl_p(ptr, val);
2444
            break;
2445
        }
2446
        invalidate_and_set_dirty(addr1, 4);
2447
    }
2448
}
2449

    
2450
void stl_phys(hwaddr addr, uint32_t val)
2451
{
2452
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2453
}
2454

    
2455
void stl_le_phys(hwaddr addr, uint32_t val)
2456
{
2457
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2458
}
2459

    
2460
void stl_be_phys(hwaddr addr, uint32_t val)
2461
{
2462
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2463
}
2464

    
2465
/* XXX: optimize */
2466
void stb_phys(hwaddr addr, uint32_t val)
2467
{
2468
    uint8_t v = val;
2469
    cpu_physical_memory_write(addr, &v, 1);
2470
}
2471

    
2472
/* warning: addr must be aligned */
2473
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2474
                                     enum device_endian endian)
2475
{
2476
    uint8_t *ptr;
2477
    MemoryRegionSection *section;
2478
    hwaddr l = 2;
2479
    hwaddr addr1;
2480

    
2481
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2482
                                      true);
2483
    if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2484
#if defined(TARGET_WORDS_BIGENDIAN)
2485
        if (endian == DEVICE_LITTLE_ENDIAN) {
2486
            val = bswap16(val);
2487
        }
2488
#else
2489
        if (endian == DEVICE_BIG_ENDIAN) {
2490
            val = bswap16(val);
2491
        }
2492
#endif
2493
        io_mem_write(section->mr, addr1, val, 2);
2494
    } else {
2495
        /* RAM case */
2496
        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2497
        ptr = qemu_get_ram_ptr(addr1);
2498
        switch (endian) {
2499
        case DEVICE_LITTLE_ENDIAN:
2500
            stw_le_p(ptr, val);
2501
            break;
2502
        case DEVICE_BIG_ENDIAN:
2503
            stw_be_p(ptr, val);
2504
            break;
2505
        default:
2506
            stw_p(ptr, val);
2507
            break;
2508
        }
2509
        invalidate_and_set_dirty(addr1, 2);
2510
    }
2511
}
2512

    
2513
void stw_phys(hwaddr addr, uint32_t val)
2514
{
2515
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2516
}
2517

    
2518
void stw_le_phys(hwaddr addr, uint32_t val)
2519
{
2520
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2521
}
2522

    
2523
void stw_be_phys(hwaddr addr, uint32_t val)
2524
{
2525
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2526
}
2527

    
2528
/* XXX: optimize */
2529
void stq_phys(hwaddr addr, uint64_t val)
2530
{
2531
    val = tswap64(val);
2532
    cpu_physical_memory_write(addr, &val, 8);
2533
}
2534

    
2535
void stq_le_phys(hwaddr addr, uint64_t val)
2536
{
2537
    val = cpu_to_le64(val);
2538
    cpu_physical_memory_write(addr, &val, 8);
2539
}
2540

    
2541
void stq_be_phys(hwaddr addr, uint64_t val)
2542
{
2543
    val = cpu_to_be64(val);
2544
    cpu_physical_memory_write(addr, &val, 8);
2545
}
2546

    
2547
/* virtual memory access for debug (includes writing to ROM) */
2548
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2549
                        uint8_t *buf, int len, int is_write)
2550
{
2551
    int l;
2552
    hwaddr phys_addr;
2553
    target_ulong page;
2554

    
2555
    while (len > 0) {
2556
        page = addr & TARGET_PAGE_MASK;
2557
        phys_addr = cpu_get_phys_page_debug(env, page);
2558
        /* if no physical page mapped, return an error */
2559
        if (phys_addr == -1)
2560
            return -1;
2561
        l = (page + TARGET_PAGE_SIZE) - addr;
2562
        if (l > len)
2563
            l = len;
2564
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2565
        if (is_write)
2566
            cpu_physical_memory_write_rom(phys_addr, buf, l);
2567
        else
2568
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2569
        len -= l;
2570
        buf += l;
2571
        addr += l;
2572
    }
2573
    return 0;
2574
}
2575
#endif
2576

    
2577
#if !defined(CONFIG_USER_ONLY)
2578

    
2579
/*
2580
 * A helper function for the _utterly broken_ virtio device model to find out if
2581
 * it's running on a big endian machine. Don't do this at home kids!
2582
 */
2583
bool virtio_is_big_endian(void);
2584
bool virtio_is_big_endian(void)
2585
{
2586
#if defined(TARGET_WORDS_BIGENDIAN)
2587
    return true;
2588
#else
2589
    return false;
2590
#endif
2591
}
2592

    
2593
#endif
2594

    
2595
#ifndef CONFIG_USER_ONLY
2596
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2597
{
2598
    MemoryRegionSection *section;
2599
    hwaddr l = 1;
2600

    
2601
    section = address_space_translate(&address_space_memory,
2602
                                      phys_addr, &phys_addr, &l, false);
2603

    
2604
    return !(memory_region_is_ram(section->mr) ||
2605
             memory_region_is_romd(section->mr));
2606
}
2607
#endif