Statistics
| Branch: | Revision:

root / exec.c @ 791af8c8

History | View | Annotate | Download (72.2 kB)

1
/*
2
 *  Virtual page mapping
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "qemu/osdep.h"
33
#include "sysemu/kvm.h"
34
#include "hw/xen/xen.h"
35
#include "qemu/timer.h"
36
#include "qemu/config-file.h"
37
#include "exec/memory.h"
38
#include "sysemu/dma.h"
39
#include "exec/address-spaces.h"
40
#if defined(CONFIG_USER_ONLY)
41
#include <qemu.h>
42
#else /* !CONFIG_USER_ONLY */
43
#include "sysemu/xen-mapcache.h"
44
#include "trace.h"
45
#endif
46
#include "exec/cpu-all.h"
47

    
48
#include "exec/cputlb.h"
49
#include "translate-all.h"
50

    
51
#include "exec/memory-internal.h"
52

    
53
//#define DEBUG_SUBPAGE
54

    
55
#if !defined(CONFIG_USER_ONLY)
56
int phys_ram_fd;
57
static int in_migration;
58

    
59
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60

    
61
static MemoryRegion *system_memory;
62
static MemoryRegion *system_io;
63

    
64
AddressSpace address_space_io;
65
AddressSpace address_space_memory;
66
DMAContext dma_context_memory;
67

    
68
MemoryRegion io_mem_rom, io_mem_notdirty;
69
static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
70

    
71
#endif
72

    
73
CPUArchState *first_cpu;
74
/* current CPU in the current thread. It is only valid inside
75
   cpu_exec() */
76
DEFINE_TLS(CPUArchState *,cpu_single_env);
77
/* 0 = Do not count executed instructions.
78
   1 = Precise instruction counting.
79
   2 = Adaptive rate instruction counting.  */
80
int use_icount;
81

    
82
#if !defined(CONFIG_USER_ONLY)
83

    
84
static MemoryRegionSection *phys_sections;
85
static unsigned phys_sections_nb, phys_sections_nb_alloc;
86
static uint16_t phys_section_unassigned;
87
static uint16_t phys_section_notdirty;
88
static uint16_t phys_section_rom;
89
static uint16_t phys_section_watch;
90

    
91
/* Simple allocator for PhysPageEntry nodes */
92
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
93
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
94

    
95
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
96

    
97
static void io_mem_init(void);
98
static void memory_map_init(void);
99
static void *qemu_safe_ram_ptr(ram_addr_t addr);
100

    
101
static MemoryRegion io_mem_watch;
102
#endif
103

    
104
#if !defined(CONFIG_USER_ONLY)
105

    
106
static void phys_map_node_reserve(unsigned nodes)
107
{
108
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
109
        typedef PhysPageEntry Node[L2_SIZE];
110
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
111
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
112
                                      phys_map_nodes_nb + nodes);
113
        phys_map_nodes = g_renew(Node, phys_map_nodes,
114
                                 phys_map_nodes_nb_alloc);
115
    }
116
}
117

    
118
static uint16_t phys_map_node_alloc(void)
119
{
120
    unsigned i;
121
    uint16_t ret;
122

    
123
    ret = phys_map_nodes_nb++;
124
    assert(ret != PHYS_MAP_NODE_NIL);
125
    assert(ret != phys_map_nodes_nb_alloc);
126
    for (i = 0; i < L2_SIZE; ++i) {
127
        phys_map_nodes[ret][i].is_leaf = 0;
128
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
129
    }
130
    return ret;
131
}
132

    
133
static void phys_map_nodes_reset(void)
134
{
135
    phys_map_nodes_nb = 0;
136
}
137

    
138

    
139
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
140
                                hwaddr *nb, uint16_t leaf,
141
                                int level)
142
{
143
    PhysPageEntry *p;
144
    int i;
145
    hwaddr step = (hwaddr)1 << (level * L2_BITS);
146

    
147
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
148
        lp->ptr = phys_map_node_alloc();
149
        p = phys_map_nodes[lp->ptr];
150
        if (level == 0) {
151
            for (i = 0; i < L2_SIZE; i++) {
152
                p[i].is_leaf = 1;
153
                p[i].ptr = phys_section_unassigned;
154
            }
155
        }
156
    } else {
157
        p = phys_map_nodes[lp->ptr];
158
    }
159
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
160

    
161
    while (*nb && lp < &p[L2_SIZE]) {
162
        if ((*index & (step - 1)) == 0 && *nb >= step) {
163
            lp->is_leaf = true;
164
            lp->ptr = leaf;
165
            *index += step;
166
            *nb -= step;
167
        } else {
168
            phys_page_set_level(lp, index, nb, leaf, level - 1);
169
        }
170
        ++lp;
171
    }
172
}
173

    
174
static void phys_page_set(AddressSpaceDispatch *d,
175
                          hwaddr index, hwaddr nb,
176
                          uint16_t leaf)
177
{
178
    /* Wildly overreserve - it doesn't matter much. */
179
    phys_map_node_reserve(3 * P_L2_LEVELS);
180

    
181
    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
182
}
183

    
184
static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
185
{
186
    PhysPageEntry lp = d->phys_map;
187
    PhysPageEntry *p;
188
    int i;
189

    
190
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
191
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
192
            return &phys_sections[phys_section_unassigned];
193
        }
194
        p = phys_map_nodes[lp.ptr];
195
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
196
    }
197
    return &phys_sections[lp.ptr];
198
}
199

    
200
bool memory_region_is_unassigned(MemoryRegion *mr)
201
{
202
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
203
        && mr != &io_mem_watch;
204
}
205

    
206
MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
207
                                             hwaddr *xlat, hwaddr *plen,
208
                                             bool is_write)
209
{
210
    MemoryRegionSection *section;
211
    Int128 diff;
212

    
213
    section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
214
    /* Compute offset within MemoryRegionSection */
215
    addr -= section->offset_within_address_space;
216

    
217
    /* Compute offset within MemoryRegion */
218
    *xlat = addr + section->offset_within_region;
219

    
220
    diff = int128_sub(section->mr->size, int128_make64(addr));
221
    *plen = MIN(int128_get64(diff), *plen);
222
    return section;
223
}
224
#endif
225

    
226
void cpu_exec_init_all(void)
227
{
228
#if !defined(CONFIG_USER_ONLY)
229
    qemu_mutex_init(&ram_list.mutex);
230
    memory_map_init();
231
    io_mem_init();
232
#endif
233
}
234

    
235
#if !defined(CONFIG_USER_ONLY)
236

    
237
static int cpu_common_post_load(void *opaque, int version_id)
238
{
239
    CPUState *cpu = opaque;
240

    
241
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
242
       version_id is increased. */
243
    cpu->interrupt_request &= ~0x01;
244
    tlb_flush(cpu->env_ptr, 1);
245

    
246
    return 0;
247
}
248

    
249
static const VMStateDescription vmstate_cpu_common = {
250
    .name = "cpu_common",
251
    .version_id = 1,
252
    .minimum_version_id = 1,
253
    .minimum_version_id_old = 1,
254
    .post_load = cpu_common_post_load,
255
    .fields      = (VMStateField []) {
256
        VMSTATE_UINT32(halted, CPUState),
257
        VMSTATE_UINT32(interrupt_request, CPUState),
258
        VMSTATE_END_OF_LIST()
259
    }
260
};
261
#else
262
#define vmstate_cpu_common vmstate_dummy
263
#endif
264

    
265
CPUState *qemu_get_cpu(int index)
266
{
267
    CPUArchState *env = first_cpu;
268
    CPUState *cpu = NULL;
269

    
270
    while (env) {
271
        cpu = ENV_GET_CPU(env);
272
        if (cpu->cpu_index == index) {
273
            break;
274
        }
275
        env = env->next_cpu;
276
    }
277

    
278
    return env ? cpu : NULL;
279
}
280

    
281
void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
282
{
283
    CPUArchState *env = first_cpu;
284

    
285
    while (env) {
286
        func(ENV_GET_CPU(env), data);
287
        env = env->next_cpu;
288
    }
289
}
290

    
291
void cpu_exec_init(CPUArchState *env)
292
{
293
    CPUState *cpu = ENV_GET_CPU(env);
294
    CPUClass *cc = CPU_GET_CLASS(cpu);
295
    CPUArchState **penv;
296
    int cpu_index;
297

    
298
#if defined(CONFIG_USER_ONLY)
299
    cpu_list_lock();
300
#endif
301
    env->next_cpu = NULL;
302
    penv = &first_cpu;
303
    cpu_index = 0;
304
    while (*penv != NULL) {
305
        penv = &(*penv)->next_cpu;
306
        cpu_index++;
307
    }
308
    cpu->cpu_index = cpu_index;
309
    cpu->numa_node = 0;
310
    QTAILQ_INIT(&env->breakpoints);
311
    QTAILQ_INIT(&env->watchpoints);
312
#ifndef CONFIG_USER_ONLY
313
    cpu->thread_id = qemu_get_thread_id();
314
#endif
315
    *penv = env;
316
#if defined(CONFIG_USER_ONLY)
317
    cpu_list_unlock();
318
#endif
319
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
320
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
321
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
322
                    cpu_save, cpu_load, env);
323
    assert(cc->vmsd == NULL);
324
#endif
325
    if (cc->vmsd != NULL) {
326
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
327
    }
328
}
329

    
330
#if defined(TARGET_HAS_ICE)
331
#if defined(CONFIG_USER_ONLY)
332
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
333
{
334
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
335
}
336
#else
337
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
338
{
339
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
340
            (pc & ~TARGET_PAGE_MASK));
341
}
342
#endif
343
#endif /* TARGET_HAS_ICE */
344

    
345
#if defined(CONFIG_USER_ONLY)
346
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
347

    
348
{
349
}
350

    
351
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
352
                          int flags, CPUWatchpoint **watchpoint)
353
{
354
    return -ENOSYS;
355
}
356
#else
357
/* Add a watchpoint.  */
358
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
359
                          int flags, CPUWatchpoint **watchpoint)
360
{
361
    target_ulong len_mask = ~(len - 1);
362
    CPUWatchpoint *wp;
363

    
364
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
365
    if ((len & (len - 1)) || (addr & ~len_mask) ||
366
            len == 0 || len > TARGET_PAGE_SIZE) {
367
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
368
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
369
        return -EINVAL;
370
    }
371
    wp = g_malloc(sizeof(*wp));
372

    
373
    wp->vaddr = addr;
374
    wp->len_mask = len_mask;
375
    wp->flags = flags;
376

    
377
    /* keep all GDB-injected watchpoints in front */
378
    if (flags & BP_GDB)
379
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
380
    else
381
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
382

    
383
    tlb_flush_page(env, addr);
384

    
385
    if (watchpoint)
386
        *watchpoint = wp;
387
    return 0;
388
}
389

    
390
/* Remove a specific watchpoint.  */
391
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
392
                          int flags)
393
{
394
    target_ulong len_mask = ~(len - 1);
395
    CPUWatchpoint *wp;
396

    
397
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
398
        if (addr == wp->vaddr && len_mask == wp->len_mask
399
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
400
            cpu_watchpoint_remove_by_ref(env, wp);
401
            return 0;
402
        }
403
    }
404
    return -ENOENT;
405
}
406

    
407
/* Remove a specific watchpoint by reference.  */
408
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
409
{
410
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
411

    
412
    tlb_flush_page(env, watchpoint->vaddr);
413

    
414
    g_free(watchpoint);
415
}
416

    
417
/* Remove all matching watchpoints.  */
418
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
419
{
420
    CPUWatchpoint *wp, *next;
421

    
422
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
423
        if (wp->flags & mask)
424
            cpu_watchpoint_remove_by_ref(env, wp);
425
    }
426
}
427
#endif
428

    
429
/* Add a breakpoint.  */
430
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
431
                          CPUBreakpoint **breakpoint)
432
{
433
#if defined(TARGET_HAS_ICE)
434
    CPUBreakpoint *bp;
435

    
436
    bp = g_malloc(sizeof(*bp));
437

    
438
    bp->pc = pc;
439
    bp->flags = flags;
440

    
441
    /* keep all GDB-injected breakpoints in front */
442
    if (flags & BP_GDB)
443
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
444
    else
445
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
446

    
447
    breakpoint_invalidate(env, pc);
448

    
449
    if (breakpoint)
450
        *breakpoint = bp;
451
    return 0;
452
#else
453
    return -ENOSYS;
454
#endif
455
}
456

    
457
/* Remove a specific breakpoint.  */
458
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
459
{
460
#if defined(TARGET_HAS_ICE)
461
    CPUBreakpoint *bp;
462

    
463
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
464
        if (bp->pc == pc && bp->flags == flags) {
465
            cpu_breakpoint_remove_by_ref(env, bp);
466
            return 0;
467
        }
468
    }
469
    return -ENOENT;
470
#else
471
    return -ENOSYS;
472
#endif
473
}
474

    
475
/* Remove a specific breakpoint by reference.  */
476
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
477
{
478
#if defined(TARGET_HAS_ICE)
479
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
480

    
481
    breakpoint_invalidate(env, breakpoint->pc);
482

    
483
    g_free(breakpoint);
484
#endif
485
}
486

    
487
/* Remove all matching breakpoints. */
488
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
489
{
490
#if defined(TARGET_HAS_ICE)
491
    CPUBreakpoint *bp, *next;
492

    
493
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
494
        if (bp->flags & mask)
495
            cpu_breakpoint_remove_by_ref(env, bp);
496
    }
497
#endif
498
}
499

    
500
/* enable or disable single step mode. EXCP_DEBUG is returned by the
501
   CPU loop after each instruction */
502
void cpu_single_step(CPUArchState *env, int enabled)
503
{
504
#if defined(TARGET_HAS_ICE)
505
    if (env->singlestep_enabled != enabled) {
506
        env->singlestep_enabled = enabled;
507
        if (kvm_enabled())
508
            kvm_update_guest_debug(env, 0);
509
        else {
510
            /* must flush all the translated code to avoid inconsistencies */
511
            /* XXX: only flush what is necessary */
512
            tb_flush(env);
513
        }
514
    }
515
#endif
516
}
517

    
518
void cpu_exit(CPUArchState *env)
519
{
520
    CPUState *cpu = ENV_GET_CPU(env);
521

    
522
    cpu->exit_request = 1;
523
    cpu->tcg_exit_req = 1;
524
}
525

    
526
void cpu_abort(CPUArchState *env, const char *fmt, ...)
527
{
528
    va_list ap;
529
    va_list ap2;
530

    
531
    va_start(ap, fmt);
532
    va_copy(ap2, ap);
533
    fprintf(stderr, "qemu: fatal: ");
534
    vfprintf(stderr, fmt, ap);
535
    fprintf(stderr, "\n");
536
    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
537
    if (qemu_log_enabled()) {
538
        qemu_log("qemu: fatal: ");
539
        qemu_log_vprintf(fmt, ap2);
540
        qemu_log("\n");
541
        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
542
        qemu_log_flush();
543
        qemu_log_close();
544
    }
545
    va_end(ap2);
546
    va_end(ap);
547
#if defined(CONFIG_USER_ONLY)
548
    {
549
        struct sigaction act;
550
        sigfillset(&act.sa_mask);
551
        act.sa_handler = SIG_DFL;
552
        sigaction(SIGABRT, &act, NULL);
553
    }
554
#endif
555
    abort();
556
}
557

    
558
CPUArchState *cpu_copy(CPUArchState *env)
559
{
560
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
561
    CPUArchState *next_cpu = new_env->next_cpu;
562
#if defined(TARGET_HAS_ICE)
563
    CPUBreakpoint *bp;
564
    CPUWatchpoint *wp;
565
#endif
566

    
567
    memcpy(new_env, env, sizeof(CPUArchState));
568

    
569
    /* Preserve chaining. */
570
    new_env->next_cpu = next_cpu;
571

    
572
    /* Clone all break/watchpoints.
573
       Note: Once we support ptrace with hw-debug register access, make sure
574
       BP_CPU break/watchpoints are handled correctly on clone. */
575
    QTAILQ_INIT(&env->breakpoints);
576
    QTAILQ_INIT(&env->watchpoints);
577
#if defined(TARGET_HAS_ICE)
578
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
579
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
580
    }
581
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
582
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
583
                              wp->flags, NULL);
584
    }
585
#endif
586

    
587
    return new_env;
588
}
589

    
590
#if !defined(CONFIG_USER_ONLY)
591
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
592
                                      uintptr_t length)
593
{
594
    uintptr_t start1;
595

    
596
    /* we modify the TLB cache so that the dirty bit will be set again
597
       when accessing the range */
598
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
599
    /* Check that we don't span multiple blocks - this breaks the
600
       address comparisons below.  */
601
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
602
            != (end - 1) - start) {
603
        abort();
604
    }
605
    cpu_tlb_reset_dirty_all(start1, length);
606

    
607
}
608

    
609
/* Note: start and end must be within the same ram block.  */
610
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
611
                                     int dirty_flags)
612
{
613
    uintptr_t length;
614

    
615
    start &= TARGET_PAGE_MASK;
616
    end = TARGET_PAGE_ALIGN(end);
617

    
618
    length = end - start;
619
    if (length == 0)
620
        return;
621
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
622

    
623
    if (tcg_enabled()) {
624
        tlb_reset_dirty_range_all(start, end, length);
625
    }
626
}
627

    
628
static int cpu_physical_memory_set_dirty_tracking(int enable)
629
{
630
    int ret = 0;
631
    in_migration = enable;
632
    return ret;
633
}
634

    
635
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
636
                                       MemoryRegionSection *section,
637
                                       target_ulong vaddr,
638
                                       hwaddr paddr, hwaddr xlat,
639
                                       int prot,
640
                                       target_ulong *address)
641
{
642
    hwaddr iotlb;
643
    CPUWatchpoint *wp;
644

    
645
    if (memory_region_is_ram(section->mr)) {
646
        /* Normal RAM.  */
647
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
648
            + xlat;
649
        if (!section->readonly) {
650
            iotlb |= phys_section_notdirty;
651
        } else {
652
            iotlb |= phys_section_rom;
653
        }
654
    } else {
655
        iotlb = section - phys_sections;
656
        iotlb += xlat;
657
    }
658

    
659
    /* Make accesses to pages with watchpoints go via the
660
       watchpoint trap routines.  */
661
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
662
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
663
            /* Avoid trapping reads of pages with a write breakpoint. */
664
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
665
                iotlb = phys_section_watch + paddr;
666
                *address |= TLB_MMIO;
667
                break;
668
            }
669
        }
670
    }
671

    
672
    return iotlb;
673
}
674
#endif /* defined(CONFIG_USER_ONLY) */
675

    
676
#if !defined(CONFIG_USER_ONLY)
677

    
678
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
679
typedef struct subpage_t {
680
    MemoryRegion iomem;
681
    hwaddr base;
682
    uint16_t sub_section[TARGET_PAGE_SIZE];
683
} subpage_t;
684

    
685
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
686
                             uint16_t section);
687
static subpage_t *subpage_init(hwaddr base);
688
static void destroy_page_desc(uint16_t section_index)
689
{
690
    MemoryRegionSection *section = &phys_sections[section_index];
691
    MemoryRegion *mr = section->mr;
692

    
693
    if (mr->subpage) {
694
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
695
        memory_region_destroy(&subpage->iomem);
696
        g_free(subpage);
697
    }
698
}
699

    
700
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
701
{
702
    unsigned i;
703
    PhysPageEntry *p;
704

    
705
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
706
        return;
707
    }
708

    
709
    p = phys_map_nodes[lp->ptr];
710
    for (i = 0; i < L2_SIZE; ++i) {
711
        if (!p[i].is_leaf) {
712
            destroy_l2_mapping(&p[i], level - 1);
713
        } else {
714
            destroy_page_desc(p[i].ptr);
715
        }
716
    }
717
    lp->is_leaf = 0;
718
    lp->ptr = PHYS_MAP_NODE_NIL;
719
}
720

    
721
static void destroy_all_mappings(AddressSpaceDispatch *d)
722
{
723
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
724
    phys_map_nodes_reset();
725
}
726

    
727
static uint16_t phys_section_add(MemoryRegionSection *section)
728
{
729
    /* The physical section number is ORed with a page-aligned
730
     * pointer to produce the iotlb entries.  Thus it should
731
     * never overflow into the page-aligned value.
732
     */
733
    assert(phys_sections_nb < TARGET_PAGE_SIZE);
734

    
735
    if (phys_sections_nb == phys_sections_nb_alloc) {
736
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
737
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
738
                                phys_sections_nb_alloc);
739
    }
740
    phys_sections[phys_sections_nb] = *section;
741
    return phys_sections_nb++;
742
}
743

    
744
static void phys_sections_clear(void)
745
{
746
    phys_sections_nb = 0;
747
}
748

    
749
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
750
{
751
    subpage_t *subpage;
752
    hwaddr base = section->offset_within_address_space
753
        & TARGET_PAGE_MASK;
754
    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
755
    MemoryRegionSection subsection = {
756
        .offset_within_address_space = base,
757
        .size = TARGET_PAGE_SIZE,
758
    };
759
    hwaddr start, end;
760

    
761
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
762

    
763
    if (!(existing->mr->subpage)) {
764
        subpage = subpage_init(base);
765
        subsection.mr = &subpage->iomem;
766
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
767
                      phys_section_add(&subsection));
768
    } else {
769
        subpage = container_of(existing->mr, subpage_t, iomem);
770
    }
771
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
772
    end = start + section->size - 1;
773
    subpage_register(subpage, start, end, phys_section_add(section));
774
}
775

    
776

    
777
static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
778
{
779
    hwaddr start_addr = section->offset_within_address_space;
780
    ram_addr_t size = section->size;
781
    hwaddr addr;
782
    uint16_t section_index = phys_section_add(section);
783

    
784
    assert(size);
785

    
786
    addr = start_addr;
787
    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
788
                  section_index);
789
}
790

    
791
QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
792

    
793
static MemoryRegionSection limit(MemoryRegionSection section)
794
{
795
    section.size = MIN(section.offset_within_address_space + section.size,
796
                       MAX_PHYS_ADDR + 1)
797
                   - section.offset_within_address_space;
798

    
799
    return section;
800
}
801

    
802
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
803
{
804
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
805
    MemoryRegionSection now = limit(*section), remain = limit(*section);
806

    
807
    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
808
        || (now.size < TARGET_PAGE_SIZE)) {
809
        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
810
                       - now.offset_within_address_space,
811
                       now.size);
812
        register_subpage(d, &now);
813
        remain.size -= now.size;
814
        remain.offset_within_address_space += now.size;
815
        remain.offset_within_region += now.size;
816
    }
817
    while (remain.size >= TARGET_PAGE_SIZE) {
818
        now = remain;
819
        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
820
            now.size = TARGET_PAGE_SIZE;
821
            register_subpage(d, &now);
822
        } else {
823
            now.size &= TARGET_PAGE_MASK;
824
            register_multipage(d, &now);
825
        }
826
        remain.size -= now.size;
827
        remain.offset_within_address_space += now.size;
828
        remain.offset_within_region += now.size;
829
    }
830
    now = remain;
831
    if (now.size) {
832
        register_subpage(d, &now);
833
    }
834
}
835

    
836
void qemu_flush_coalesced_mmio_buffer(void)
837
{
838
    if (kvm_enabled())
839
        kvm_flush_coalesced_mmio_buffer();
840
}
841

    
842
void qemu_mutex_lock_ramlist(void)
843
{
844
    qemu_mutex_lock(&ram_list.mutex);
845
}
846

    
847
void qemu_mutex_unlock_ramlist(void)
848
{
849
    qemu_mutex_unlock(&ram_list.mutex);
850
}
851

    
852
#if defined(__linux__) && !defined(TARGET_S390X)
853

    
854
#include <sys/vfs.h>
855

    
856
#define HUGETLBFS_MAGIC       0x958458f6
857

    
858
static long gethugepagesize(const char *path)
859
{
860
    struct statfs fs;
861
    int ret;
862

    
863
    do {
864
        ret = statfs(path, &fs);
865
    } while (ret != 0 && errno == EINTR);
866

    
867
    if (ret != 0) {
868
        perror(path);
869
        return 0;
870
    }
871

    
872
    if (fs.f_type != HUGETLBFS_MAGIC)
873
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
874

    
875
    return fs.f_bsize;
876
}
877

    
878
static void *file_ram_alloc(RAMBlock *block,
879
                            ram_addr_t memory,
880
                            const char *path)
881
{
882
    char *filename;
883
    char *sanitized_name;
884
    char *c;
885
    void *area;
886
    int fd;
887
#ifdef MAP_POPULATE
888
    int flags;
889
#endif
890
    unsigned long hpagesize;
891

    
892
    hpagesize = gethugepagesize(path);
893
    if (!hpagesize) {
894
        return NULL;
895
    }
896

    
897
    if (memory < hpagesize) {
898
        return NULL;
899
    }
900

    
901
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
902
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
903
        return NULL;
904
    }
905

    
906
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
907
    sanitized_name = g_strdup(block->mr->name);
908
    for (c = sanitized_name; *c != '\0'; c++) {
909
        if (*c == '/')
910
            *c = '_';
911
    }
912

    
913
    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
914
                               sanitized_name);
915
    g_free(sanitized_name);
916

    
917
    fd = mkstemp(filename);
918
    if (fd < 0) {
919
        perror("unable to create backing store for hugepages");
920
        g_free(filename);
921
        return NULL;
922
    }
923
    unlink(filename);
924
    g_free(filename);
925

    
926
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
927

    
928
    /*
929
     * ftruncate is not supported by hugetlbfs in older
930
     * hosts, so don't bother bailing out on errors.
931
     * If anything goes wrong with it under other filesystems,
932
     * mmap will fail.
933
     */
934
    if (ftruncate(fd, memory))
935
        perror("ftruncate");
936

    
937
#ifdef MAP_POPULATE
938
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
939
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
940
     * to sidestep this quirk.
941
     */
942
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
943
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
944
#else
945
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
946
#endif
947
    if (area == MAP_FAILED) {
948
        perror("file_ram_alloc: can't mmap RAM pages");
949
        close(fd);
950
        return (NULL);
951
    }
952
    block->fd = fd;
953
    return area;
954
}
955
#endif
956

    
957
static ram_addr_t find_ram_offset(ram_addr_t size)
958
{
959
    RAMBlock *block, *next_block;
960
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
961

    
962
    assert(size != 0); /* it would hand out same offset multiple times */
963

    
964
    if (QTAILQ_EMPTY(&ram_list.blocks))
965
        return 0;
966

    
967
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
968
        ram_addr_t end, next = RAM_ADDR_MAX;
969

    
970
        end = block->offset + block->length;
971

    
972
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
973
            if (next_block->offset >= end) {
974
                next = MIN(next, next_block->offset);
975
            }
976
        }
977
        if (next - end >= size && next - end < mingap) {
978
            offset = end;
979
            mingap = next - end;
980
        }
981
    }
982

    
983
    if (offset == RAM_ADDR_MAX) {
984
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
985
                (uint64_t)size);
986
        abort();
987
    }
988

    
989
    return offset;
990
}
991

    
992
ram_addr_t last_ram_offset(void)
993
{
994
    RAMBlock *block;
995
    ram_addr_t last = 0;
996

    
997
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
998
        last = MAX(last, block->offset + block->length);
999

    
1000
    return last;
1001
}
1002

    
1003
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1004
{
1005
    int ret;
1006
    QemuOpts *machine_opts;
1007

    
1008
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1009
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1010
    if (machine_opts &&
1011
        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1012
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1013
        if (ret) {
1014
            perror("qemu_madvise");
1015
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1016
                            "but dump_guest_core=off specified\n");
1017
        }
1018
    }
1019
}
1020

    
1021
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1022
{
1023
    RAMBlock *new_block, *block;
1024

    
1025
    new_block = NULL;
1026
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1027
        if (block->offset == addr) {
1028
            new_block = block;
1029
            break;
1030
        }
1031
    }
1032
    assert(new_block);
1033
    assert(!new_block->idstr[0]);
1034

    
1035
    if (dev) {
1036
        char *id = qdev_get_dev_path(dev);
1037
        if (id) {
1038
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1039
            g_free(id);
1040
        }
1041
    }
1042
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1043

    
1044
    /* This assumes the iothread lock is taken here too.  */
1045
    qemu_mutex_lock_ramlist();
1046
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1047
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1048
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1049
                    new_block->idstr);
1050
            abort();
1051
        }
1052
    }
1053
    qemu_mutex_unlock_ramlist();
1054
}
1055

    
1056
static int memory_try_enable_merging(void *addr, size_t len)
1057
{
1058
    QemuOpts *opts;
1059

    
1060
    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1061
    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1062
        /* disabled by the user */
1063
        return 0;
1064
    }
1065

    
1066
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1067
}
1068

    
1069
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1070
                                   MemoryRegion *mr)
1071
{
1072
    RAMBlock *block, *new_block;
1073

    
1074
    size = TARGET_PAGE_ALIGN(size);
1075
    new_block = g_malloc0(sizeof(*new_block));
1076

    
1077
    /* This assumes the iothread lock is taken here too.  */
1078
    qemu_mutex_lock_ramlist();
1079
    new_block->mr = mr;
1080
    new_block->offset = find_ram_offset(size);
1081
    if (host) {
1082
        new_block->host = host;
1083
        new_block->flags |= RAM_PREALLOC_MASK;
1084
    } else {
1085
        if (mem_path) {
1086
#if defined (__linux__) && !defined(TARGET_S390X)
1087
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1088
            if (!new_block->host) {
1089
                new_block->host = qemu_anon_ram_alloc(size);
1090
                memory_try_enable_merging(new_block->host, size);
1091
            }
1092
#else
1093
            fprintf(stderr, "-mem-path option unsupported\n");
1094
            exit(1);
1095
#endif
1096
        } else {
1097
            if (xen_enabled()) {
1098
                xen_ram_alloc(new_block->offset, size, mr);
1099
            } else if (kvm_enabled()) {
1100
                /* some s390/kvm configurations have special constraints */
1101
                new_block->host = kvm_ram_alloc(size);
1102
            } else {
1103
                new_block->host = qemu_anon_ram_alloc(size);
1104
            }
1105
            memory_try_enable_merging(new_block->host, size);
1106
        }
1107
    }
1108
    new_block->length = size;
1109

    
1110
    /* Keep the list sorted from biggest to smallest block.  */
1111
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1112
        if (block->length < new_block->length) {
1113
            break;
1114
        }
1115
    }
1116
    if (block) {
1117
        QTAILQ_INSERT_BEFORE(block, new_block, next);
1118
    } else {
1119
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1120
    }
1121
    ram_list.mru_block = NULL;
1122

    
1123
    ram_list.version++;
1124
    qemu_mutex_unlock_ramlist();
1125

    
1126
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1127
                                       last_ram_offset() >> TARGET_PAGE_BITS);
1128
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1129
           0, size >> TARGET_PAGE_BITS);
1130
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1131

    
1132
    qemu_ram_setup_dump(new_block->host, size);
1133
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1134

    
1135
    if (kvm_enabled())
1136
        kvm_setup_guest_memory(new_block->host, size);
1137

    
1138
    return new_block->offset;
1139
}
1140

    
1141
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1142
{
1143
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1144
}
1145

    
1146
void qemu_ram_free_from_ptr(ram_addr_t addr)
1147
{
1148
    RAMBlock *block;
1149

    
1150
    /* This assumes the iothread lock is taken here too.  */
1151
    qemu_mutex_lock_ramlist();
1152
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1153
        if (addr == block->offset) {
1154
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1155
            ram_list.mru_block = NULL;
1156
            ram_list.version++;
1157
            g_free(block);
1158
            break;
1159
        }
1160
    }
1161
    qemu_mutex_unlock_ramlist();
1162
}
1163

    
1164
void qemu_ram_free(ram_addr_t addr)
1165
{
1166
    RAMBlock *block;
1167

    
1168
    /* This assumes the iothread lock is taken here too.  */
1169
    qemu_mutex_lock_ramlist();
1170
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1171
        if (addr == block->offset) {
1172
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1173
            ram_list.mru_block = NULL;
1174
            ram_list.version++;
1175
            if (block->flags & RAM_PREALLOC_MASK) {
1176
                ;
1177
            } else if (mem_path) {
1178
#if defined (__linux__) && !defined(TARGET_S390X)
1179
                if (block->fd) {
1180
                    munmap(block->host, block->length);
1181
                    close(block->fd);
1182
                } else {
1183
                    qemu_anon_ram_free(block->host, block->length);
1184
                }
1185
#else
1186
                abort();
1187
#endif
1188
            } else {
1189
                if (xen_enabled()) {
1190
                    xen_invalidate_map_cache_entry(block->host);
1191
                } else {
1192
                    qemu_anon_ram_free(block->host, block->length);
1193
                }
1194
            }
1195
            g_free(block);
1196
            break;
1197
        }
1198
    }
1199
    qemu_mutex_unlock_ramlist();
1200

    
1201
}
1202

    
1203
#ifndef _WIN32
1204
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1205
{
1206
    RAMBlock *block;
1207
    ram_addr_t offset;
1208
    int flags;
1209
    void *area, *vaddr;
1210

    
1211
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1212
        offset = addr - block->offset;
1213
        if (offset < block->length) {
1214
            vaddr = block->host + offset;
1215
            if (block->flags & RAM_PREALLOC_MASK) {
1216
                ;
1217
            } else {
1218
                flags = MAP_FIXED;
1219
                munmap(vaddr, length);
1220
                if (mem_path) {
1221
#if defined(__linux__) && !defined(TARGET_S390X)
1222
                    if (block->fd) {
1223
#ifdef MAP_POPULATE
1224
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1225
                            MAP_PRIVATE;
1226
#else
1227
                        flags |= MAP_PRIVATE;
1228
#endif
1229
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1230
                                    flags, block->fd, offset);
1231
                    } else {
1232
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1233
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1234
                                    flags, -1, 0);
1235
                    }
1236
#else
1237
                    abort();
1238
#endif
1239
                } else {
1240
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1241
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
1242
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1243
                                flags, -1, 0);
1244
#else
1245
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1246
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1247
                                flags, -1, 0);
1248
#endif
1249
                }
1250
                if (area != vaddr) {
1251
                    fprintf(stderr, "Could not remap addr: "
1252
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1253
                            length, addr);
1254
                    exit(1);
1255
                }
1256
                memory_try_enable_merging(vaddr, length);
1257
                qemu_ram_setup_dump(vaddr, length);
1258
            }
1259
            return;
1260
        }
1261
    }
1262
}
1263
#endif /* !_WIN32 */
1264

    
1265
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1266
   With the exception of the softmmu code in this file, this should
1267
   only be used for local memory (e.g. video ram) that the device owns,
1268
   and knows it isn't going to access beyond the end of the block.
1269

1270
   It should not be used for general purpose DMA.
1271
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1272
 */
1273
void *qemu_get_ram_ptr(ram_addr_t addr)
1274
{
1275
    RAMBlock *block;
1276

    
1277
    /* The list is protected by the iothread lock here.  */
1278
    block = ram_list.mru_block;
1279
    if (block && addr - block->offset < block->length) {
1280
        goto found;
1281
    }
1282
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1283
        if (addr - block->offset < block->length) {
1284
            goto found;
1285
        }
1286
    }
1287

    
1288
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1289
    abort();
1290

    
1291
found:
1292
    ram_list.mru_block = block;
1293
    if (xen_enabled()) {
1294
        /* We need to check if the requested address is in the RAM
1295
         * because we don't want to map the entire memory in QEMU.
1296
         * In that case just map until the end of the page.
1297
         */
1298
        if (block->offset == 0) {
1299
            return xen_map_cache(addr, 0, 0);
1300
        } else if (block->host == NULL) {
1301
            block->host =
1302
                xen_map_cache(block->offset, block->length, 1);
1303
        }
1304
    }
1305
    return block->host + (addr - block->offset);
1306
}
1307

    
1308
/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1309
 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1310
 *
1311
 * ??? Is this still necessary?
1312
 */
1313
static void *qemu_safe_ram_ptr(ram_addr_t addr)
1314
{
1315
    RAMBlock *block;
1316

    
1317
    /* The list is protected by the iothread lock here.  */
1318
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1319
        if (addr - block->offset < block->length) {
1320
            if (xen_enabled()) {
1321
                /* We need to check if the requested address is in the RAM
1322
                 * because we don't want to map the entire memory in QEMU.
1323
                 * In that case just map until the end of the page.
1324
                 */
1325
                if (block->offset == 0) {
1326
                    return xen_map_cache(addr, 0, 0);
1327
                } else if (block->host == NULL) {
1328
                    block->host =
1329
                        xen_map_cache(block->offset, block->length, 1);
1330
                }
1331
            }
1332
            return block->host + (addr - block->offset);
1333
        }
1334
    }
1335

    
1336
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1337
    abort();
1338

    
1339
    return NULL;
1340
}
1341

    
1342
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1343
 * but takes a size argument */
1344
static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1345
{
1346
    if (*size == 0) {
1347
        return NULL;
1348
    }
1349
    if (xen_enabled()) {
1350
        return xen_map_cache(addr, *size, 1);
1351
    } else {
1352
        RAMBlock *block;
1353

    
1354
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1355
            if (addr - block->offset < block->length) {
1356
                if (addr - block->offset + *size > block->length)
1357
                    *size = block->length - addr + block->offset;
1358
                return block->host + (addr - block->offset);
1359
            }
1360
        }
1361

    
1362
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1363
        abort();
1364
    }
1365
}
1366

    
1367
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1368
{
1369
    RAMBlock *block;
1370
    uint8_t *host = ptr;
1371

    
1372
    if (xen_enabled()) {
1373
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1374
        return 0;
1375
    }
1376

    
1377
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378
        /* This case append when the block is not mapped. */
1379
        if (block->host == NULL) {
1380
            continue;
1381
        }
1382
        if (host - block->host < block->length) {
1383
            *ram_addr = block->offset + (host - block->host);
1384
            return 0;
1385
        }
1386
    }
1387

    
1388
    return -1;
1389
}
1390

    
1391
/* Some of the softmmu routines need to translate from a host pointer
1392
   (typically a TLB entry) back to a ram offset.  */
1393
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1394
{
1395
    ram_addr_t ram_addr;
1396

    
1397
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1398
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
1399
        abort();
1400
    }
1401
    return ram_addr;
1402
}
1403

    
1404
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1405
                               uint64_t val, unsigned size)
1406
{
1407
    int dirty_flags;
1408
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1409
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1410
        tb_invalidate_phys_page_fast(ram_addr, size);
1411
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1412
    }
1413
    switch (size) {
1414
    case 1:
1415
        stb_p(qemu_get_ram_ptr(ram_addr), val);
1416
        break;
1417
    case 2:
1418
        stw_p(qemu_get_ram_ptr(ram_addr), val);
1419
        break;
1420
    case 4:
1421
        stl_p(qemu_get_ram_ptr(ram_addr), val);
1422
        break;
1423
    default:
1424
        abort();
1425
    }
1426
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1427
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1428
    /* we remove the notdirty callback only if the code has been
1429
       flushed */
1430
    if (dirty_flags == 0xff)
1431
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1432
}
1433

    
1434
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1435
                                 unsigned size, bool is_write)
1436
{
1437
    return is_write;
1438
}
1439

    
1440
static const MemoryRegionOps notdirty_mem_ops = {
1441
    .write = notdirty_mem_write,
1442
    .valid.accepts = notdirty_mem_accepts,
1443
    .endianness = DEVICE_NATIVE_ENDIAN,
1444
};
1445

    
1446
/* Generate a debug exception if a watchpoint has been hit.  */
1447
static void check_watchpoint(int offset, int len_mask, int flags)
1448
{
1449
    CPUArchState *env = cpu_single_env;
1450
    target_ulong pc, cs_base;
1451
    target_ulong vaddr;
1452
    CPUWatchpoint *wp;
1453
    int cpu_flags;
1454

    
1455
    if (env->watchpoint_hit) {
1456
        /* We re-entered the check after replacing the TB. Now raise
1457
         * the debug interrupt so that is will trigger after the
1458
         * current instruction. */
1459
        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1460
        return;
1461
    }
1462
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1463
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1464
        if ((vaddr == (wp->vaddr & len_mask) ||
1465
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1466
            wp->flags |= BP_WATCHPOINT_HIT;
1467
            if (!env->watchpoint_hit) {
1468
                env->watchpoint_hit = wp;
1469
                tb_check_watchpoint(env);
1470
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1471
                    env->exception_index = EXCP_DEBUG;
1472
                    cpu_loop_exit(env);
1473
                } else {
1474
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1475
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1476
                    cpu_resume_from_signal(env, NULL);
1477
                }
1478
            }
1479
        } else {
1480
            wp->flags &= ~BP_WATCHPOINT_HIT;
1481
        }
1482
    }
1483
}
1484

    
1485
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1486
   so these check for a hit then pass through to the normal out-of-line
1487
   phys routines.  */
1488
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1489
                               unsigned size)
1490
{
1491
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1492
    switch (size) {
1493
    case 1: return ldub_phys(addr);
1494
    case 2: return lduw_phys(addr);
1495
    case 4: return ldl_phys(addr);
1496
    default: abort();
1497
    }
1498
}
1499

    
1500
static void watch_mem_write(void *opaque, hwaddr addr,
1501
                            uint64_t val, unsigned size)
1502
{
1503
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1504
    switch (size) {
1505
    case 1:
1506
        stb_phys(addr, val);
1507
        break;
1508
    case 2:
1509
        stw_phys(addr, val);
1510
        break;
1511
    case 4:
1512
        stl_phys(addr, val);
1513
        break;
1514
    default: abort();
1515
    }
1516
}
1517

    
1518
static const MemoryRegionOps watch_mem_ops = {
1519
    .read = watch_mem_read,
1520
    .write = watch_mem_write,
1521
    .endianness = DEVICE_NATIVE_ENDIAN,
1522
};
1523

    
1524
static uint64_t subpage_read(void *opaque, hwaddr addr,
1525
                             unsigned len)
1526
{
1527
    subpage_t *mmio = opaque;
1528
    unsigned int idx = SUBPAGE_IDX(addr);
1529
    uint64_t val;
1530

    
1531
    MemoryRegionSection *section;
1532
#if defined(DEBUG_SUBPAGE)
1533
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1534
           mmio, len, addr, idx);
1535
#endif
1536

    
1537
    section = &phys_sections[mmio->sub_section[idx]];
1538
    addr += mmio->base;
1539
    addr -= section->offset_within_address_space;
1540
    addr += section->offset_within_region;
1541
    io_mem_read(section->mr, addr, &val, len);
1542
    return val;
1543
}
1544

    
1545
static void subpage_write(void *opaque, hwaddr addr,
1546
                          uint64_t value, unsigned len)
1547
{
1548
    subpage_t *mmio = opaque;
1549
    unsigned int idx = SUBPAGE_IDX(addr);
1550
    MemoryRegionSection *section;
1551
#if defined(DEBUG_SUBPAGE)
1552
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1553
           " idx %d value %"PRIx64"\n",
1554
           __func__, mmio, len, addr, idx, value);
1555
#endif
1556

    
1557
    section = &phys_sections[mmio->sub_section[idx]];
1558
    addr += mmio->base;
1559
    addr -= section->offset_within_address_space;
1560
    addr += section->offset_within_region;
1561
    io_mem_write(section->mr, addr, value, len);
1562
}
1563

    
1564
static bool subpage_accepts(void *opaque, hwaddr addr,
1565
                            unsigned size, bool is_write)
1566
{
1567
    subpage_t *mmio = opaque;
1568
    unsigned int idx = SUBPAGE_IDX(addr);
1569
    MemoryRegionSection *section;
1570
#if defined(DEBUG_SUBPAGE)
1571
    printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
1572
           " idx %d\n", __func__, mmio,
1573
           is_write ? 'w' : 'r', len, addr, idx);
1574
#endif
1575

    
1576
    section = &phys_sections[mmio->sub_section[idx]];
1577
    addr += mmio->base;
1578
    addr -= section->offset_within_address_space;
1579
    addr += section->offset_within_region;
1580
    return memory_region_access_valid(section->mr, addr, size, is_write);
1581
}
1582

    
1583
static const MemoryRegionOps subpage_ops = {
1584
    .read = subpage_read,
1585
    .write = subpage_write,
1586
    .valid.accepts = subpage_accepts,
1587
    .endianness = DEVICE_NATIVE_ENDIAN,
1588
};
1589

    
1590
static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1591
                                 unsigned size)
1592
{
1593
    ram_addr_t raddr = addr;
1594
    void *ptr = qemu_get_ram_ptr(raddr);
1595
    switch (size) {
1596
    case 1: return ldub_p(ptr);
1597
    case 2: return lduw_p(ptr);
1598
    case 4: return ldl_p(ptr);
1599
    default: abort();
1600
    }
1601
}
1602

    
1603
static void subpage_ram_write(void *opaque, hwaddr addr,
1604
                              uint64_t value, unsigned size)
1605
{
1606
    ram_addr_t raddr = addr;
1607
    void *ptr = qemu_get_ram_ptr(raddr);
1608
    switch (size) {
1609
    case 1: return stb_p(ptr, value);
1610
    case 2: return stw_p(ptr, value);
1611
    case 4: return stl_p(ptr, value);
1612
    default: abort();
1613
    }
1614
}
1615

    
1616
static const MemoryRegionOps subpage_ram_ops = {
1617
    .read = subpage_ram_read,
1618
    .write = subpage_ram_write,
1619
    .endianness = DEVICE_NATIVE_ENDIAN,
1620
};
1621

    
1622
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1623
                             uint16_t section)
1624
{
1625
    int idx, eidx;
1626

    
1627
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1628
        return -1;
1629
    idx = SUBPAGE_IDX(start);
1630
    eidx = SUBPAGE_IDX(end);
1631
#if defined(DEBUG_SUBPAGE)
1632
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1633
           mmio, start, end, idx, eidx, memory);
1634
#endif
1635
    if (memory_region_is_ram(phys_sections[section].mr)) {
1636
        MemoryRegionSection new_section = phys_sections[section];
1637
        new_section.mr = &io_mem_subpage_ram;
1638
        section = phys_section_add(&new_section);
1639
    }
1640
    for (; idx <= eidx; idx++) {
1641
        mmio->sub_section[idx] = section;
1642
    }
1643

    
1644
    return 0;
1645
}
1646

    
1647
static subpage_t *subpage_init(hwaddr base)
1648
{
1649
    subpage_t *mmio;
1650

    
1651
    mmio = g_malloc0(sizeof(subpage_t));
1652

    
1653
    mmio->base = base;
1654
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1655
                          "subpage", TARGET_PAGE_SIZE);
1656
    mmio->iomem.subpage = true;
1657
#if defined(DEBUG_SUBPAGE)
1658
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1659
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1660
#endif
1661
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1662

    
1663
    return mmio;
1664
}
1665

    
1666
static uint16_t dummy_section(MemoryRegion *mr)
1667
{
1668
    MemoryRegionSection section = {
1669
        .mr = mr,
1670
        .offset_within_address_space = 0,
1671
        .offset_within_region = 0,
1672
        .size = UINT64_MAX,
1673
    };
1674

    
1675
    return phys_section_add(&section);
1676
}
1677

    
1678
MemoryRegion *iotlb_to_region(hwaddr index)
1679
{
1680
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1681
}
1682

    
1683
static void io_mem_init(void)
1684
{
1685
    memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1686
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1687
                          "unassigned", UINT64_MAX);
1688
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1689
                          "notdirty", UINT64_MAX);
1690
    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1691
                          "subpage-ram", UINT64_MAX);
1692
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1693
                          "watch", UINT64_MAX);
1694
}
1695

    
1696
static void mem_begin(MemoryListener *listener)
1697
{
1698
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1699

    
1700
    destroy_all_mappings(d);
1701
    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1702
}
1703

    
1704
static void core_begin(MemoryListener *listener)
1705
{
1706
    phys_sections_clear();
1707
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
1708
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
1709
    phys_section_rom = dummy_section(&io_mem_rom);
1710
    phys_section_watch = dummy_section(&io_mem_watch);
1711
}
1712

    
1713
static void tcg_commit(MemoryListener *listener)
1714
{
1715
    CPUArchState *env;
1716

    
1717
    /* since each CPU stores ram addresses in its TLB cache, we must
1718
       reset the modified entries */
1719
    /* XXX: slow ! */
1720
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1721
        tlb_flush(env, 1);
1722
    }
1723
}
1724

    
1725
static void core_log_global_start(MemoryListener *listener)
1726
{
1727
    cpu_physical_memory_set_dirty_tracking(1);
1728
}
1729

    
1730
static void core_log_global_stop(MemoryListener *listener)
1731
{
1732
    cpu_physical_memory_set_dirty_tracking(0);
1733
}
1734

    
1735
static void io_region_add(MemoryListener *listener,
1736
                          MemoryRegionSection *section)
1737
{
1738
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1739

    
1740
    mrio->mr = section->mr;
1741
    mrio->offset = section->offset_within_region;
1742
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1743
                 section->offset_within_address_space, section->size);
1744
    ioport_register(&mrio->iorange);
1745
}
1746

    
1747
static void io_region_del(MemoryListener *listener,
1748
                          MemoryRegionSection *section)
1749
{
1750
    isa_unassign_ioport(section->offset_within_address_space, section->size);
1751
}
1752

    
1753
static MemoryListener core_memory_listener = {
1754
    .begin = core_begin,
1755
    .log_global_start = core_log_global_start,
1756
    .log_global_stop = core_log_global_stop,
1757
    .priority = 1,
1758
};
1759

    
1760
static MemoryListener io_memory_listener = {
1761
    .region_add = io_region_add,
1762
    .region_del = io_region_del,
1763
    .priority = 0,
1764
};
1765

    
1766
static MemoryListener tcg_memory_listener = {
1767
    .commit = tcg_commit,
1768
};
1769

    
1770
void address_space_init_dispatch(AddressSpace *as)
1771
{
1772
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1773

    
1774
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1775
    d->listener = (MemoryListener) {
1776
        .begin = mem_begin,
1777
        .region_add = mem_add,
1778
        .region_nop = mem_add,
1779
        .priority = 0,
1780
    };
1781
    as->dispatch = d;
1782
    memory_listener_register(&d->listener, as);
1783
}
1784

    
1785
void address_space_destroy_dispatch(AddressSpace *as)
1786
{
1787
    AddressSpaceDispatch *d = as->dispatch;
1788

    
1789
    memory_listener_unregister(&d->listener);
1790
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1791
    g_free(d);
1792
    as->dispatch = NULL;
1793
}
1794

    
1795
static void memory_map_init(void)
1796
{
1797
    system_memory = g_malloc(sizeof(*system_memory));
1798
    memory_region_init(system_memory, "system", INT64_MAX);
1799
    address_space_init(&address_space_memory, system_memory);
1800
    address_space_memory.name = "memory";
1801

    
1802
    system_io = g_malloc(sizeof(*system_io));
1803
    memory_region_init(system_io, "io", 65536);
1804
    address_space_init(&address_space_io, system_io);
1805
    address_space_io.name = "I/O";
1806

    
1807
    memory_listener_register(&core_memory_listener, &address_space_memory);
1808
    memory_listener_register(&io_memory_listener, &address_space_io);
1809
    memory_listener_register(&tcg_memory_listener, &address_space_memory);
1810

    
1811
    dma_context_init(&dma_context_memory, &address_space_memory,
1812
                     NULL, NULL, NULL);
1813
}
1814

    
1815
MemoryRegion *get_system_memory(void)
1816
{
1817
    return system_memory;
1818
}
1819

    
1820
MemoryRegion *get_system_io(void)
1821
{
1822
    return system_io;
1823
}
1824

    
1825
#endif /* !defined(CONFIG_USER_ONLY) */
1826

    
1827
/* physical memory access (slow version, mainly for debug) */
1828
#if defined(CONFIG_USER_ONLY)
1829
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1830
                        uint8_t *buf, int len, int is_write)
1831
{
1832
    int l, flags;
1833
    target_ulong page;
1834
    void * p;
1835

    
1836
    while (len > 0) {
1837
        page = addr & TARGET_PAGE_MASK;
1838
        l = (page + TARGET_PAGE_SIZE) - addr;
1839
        if (l > len)
1840
            l = len;
1841
        flags = page_get_flags(page);
1842
        if (!(flags & PAGE_VALID))
1843
            return -1;
1844
        if (is_write) {
1845
            if (!(flags & PAGE_WRITE))
1846
                return -1;
1847
            /* XXX: this code should not depend on lock_user */
1848
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1849
                return -1;
1850
            memcpy(p, buf, l);
1851
            unlock_user(p, addr, l);
1852
        } else {
1853
            if (!(flags & PAGE_READ))
1854
                return -1;
1855
            /* XXX: this code should not depend on lock_user */
1856
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1857
                return -1;
1858
            memcpy(buf, p, l);
1859
            unlock_user(p, addr, 0);
1860
        }
1861
        len -= l;
1862
        buf += l;
1863
        addr += l;
1864
    }
1865
    return 0;
1866
}
1867

    
1868
#else
1869

    
1870
static void invalidate_and_set_dirty(hwaddr addr,
1871
                                     hwaddr length)
1872
{
1873
    if (!cpu_physical_memory_is_dirty(addr)) {
1874
        /* invalidate code */
1875
        tb_invalidate_phys_page_range(addr, addr + length, 0);
1876
        /* set dirty bit */
1877
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1878
    }
1879
    xen_modified_memory(addr, length);
1880
}
1881

    
1882
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1883
{
1884
    if (memory_region_is_ram(mr)) {
1885
        return !(is_write && mr->readonly);
1886
    }
1887
    if (memory_region_is_romd(mr)) {
1888
        return !is_write;
1889
    }
1890

    
1891
    return false;
1892
}
1893

    
1894
static inline int memory_access_size(int l, hwaddr addr)
1895
{
1896
    if (l >= 4 && ((addr & 3) == 0)) {
1897
        return 4;
1898
    }
1899
    if (l >= 2 && ((addr & 1) == 0)) {
1900
        return 2;
1901
    }
1902
    return 1;
1903
}
1904

    
1905
void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1906
                      int len, bool is_write)
1907
{
1908
    hwaddr l;
1909
    uint8_t *ptr;
1910
    uint64_t val;
1911
    hwaddr addr1;
1912
    MemoryRegionSection *section;
1913

    
1914
    while (len > 0) {
1915
        l = len;
1916
        section = address_space_translate(as, addr, &addr1, &l, is_write);
1917

    
1918
        if (is_write) {
1919
            if (!memory_access_is_direct(section->mr, is_write)) {
1920
                l = memory_access_size(l, addr1);
1921
                /* XXX: could force cpu_single_env to NULL to avoid
1922
                   potential bugs */
1923
                if (l == 4) {
1924
                    /* 32 bit write access */
1925
                    val = ldl_p(buf);
1926
                    io_mem_write(section->mr, addr1, val, 4);
1927
                } else if (l == 2) {
1928
                    /* 16 bit write access */
1929
                    val = lduw_p(buf);
1930
                    io_mem_write(section->mr, addr1, val, 2);
1931
                } else {
1932
                    /* 8 bit write access */
1933
                    val = ldub_p(buf);
1934
                    io_mem_write(section->mr, addr1, val, 1);
1935
                }
1936
            } else {
1937
                addr1 += memory_region_get_ram_addr(section->mr);
1938
                /* RAM case */
1939
                ptr = qemu_get_ram_ptr(addr1);
1940
                memcpy(ptr, buf, l);
1941
                invalidate_and_set_dirty(addr1, l);
1942
            }
1943
        } else {
1944
            if (!memory_access_is_direct(section->mr, is_write)) {
1945
                /* I/O case */
1946
                l = memory_access_size(l, addr1);
1947
                if (l == 4) {
1948
                    /* 32 bit read access */
1949
                    io_mem_read(section->mr, addr1, &val, 4);
1950
                    stl_p(buf, val);
1951
                } else if (l == 2) {
1952
                    /* 16 bit read access */
1953
                    io_mem_read(section->mr, addr1, &val, 2);
1954
                    stw_p(buf, val);
1955
                } else {
1956
                    /* 8 bit read access */
1957
                    io_mem_read(section->mr, addr1, &val, 1);
1958
                    stb_p(buf, val);
1959
                }
1960
            } else {
1961
                /* RAM case */
1962
                ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
1963
                memcpy(buf, ptr, l);
1964
            }
1965
        }
1966
        len -= l;
1967
        buf += l;
1968
        addr += l;
1969
    }
1970
}
1971

    
1972
void address_space_write(AddressSpace *as, hwaddr addr,
1973
                         const uint8_t *buf, int len)
1974
{
1975
    address_space_rw(as, addr, (uint8_t *)buf, len, true);
1976
}
1977

    
1978
/**
1979
 * address_space_read: read from an address space.
1980
 *
1981
 * @as: #AddressSpace to be accessed
1982
 * @addr: address within that address space
1983
 * @buf: buffer with the data transferred
1984
 */
1985
void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1986
{
1987
    address_space_rw(as, addr, buf, len, false);
1988
}
1989

    
1990

    
1991
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1992
                            int len, int is_write)
1993
{
1994
    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1995
}
1996

    
1997
/* used for ROM loading : can write in RAM and ROM */
1998
void cpu_physical_memory_write_rom(hwaddr addr,
1999
                                   const uint8_t *buf, int len)
2000
{
2001
    hwaddr l;
2002
    uint8_t *ptr;
2003
    hwaddr addr1;
2004
    MemoryRegionSection *section;
2005

    
2006
    while (len > 0) {
2007
        l = len;
2008
        section = address_space_translate(&address_space_memory,
2009
                                          addr, &addr1, &l, true);
2010

    
2011
        if (!(memory_region_is_ram(section->mr) ||
2012
              memory_region_is_romd(section->mr))) {
2013
            /* do nothing */
2014
        } else {
2015
            addr1 += memory_region_get_ram_addr(section->mr);
2016
            /* ROM/RAM case */
2017
            ptr = qemu_get_ram_ptr(addr1);
2018
            memcpy(ptr, buf, l);
2019
            invalidate_and_set_dirty(addr1, l);
2020
        }
2021
        len -= l;
2022
        buf += l;
2023
        addr += l;
2024
    }
2025
}
2026

    
2027
typedef struct {
2028
    void *buffer;
2029
    hwaddr addr;
2030
    hwaddr len;
2031
} BounceBuffer;
2032

    
2033
static BounceBuffer bounce;
2034

    
2035
typedef struct MapClient {
2036
    void *opaque;
2037
    void (*callback)(void *opaque);
2038
    QLIST_ENTRY(MapClient) link;
2039
} MapClient;
2040

    
2041
static QLIST_HEAD(map_client_list, MapClient) map_client_list
2042
    = QLIST_HEAD_INITIALIZER(map_client_list);
2043

    
2044
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2045
{
2046
    MapClient *client = g_malloc(sizeof(*client));
2047

    
2048
    client->opaque = opaque;
2049
    client->callback = callback;
2050
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2051
    return client;
2052
}
2053

    
2054
static void cpu_unregister_map_client(void *_client)
2055
{
2056
    MapClient *client = (MapClient *)_client;
2057

    
2058
    QLIST_REMOVE(client, link);
2059
    g_free(client);
2060
}
2061

    
2062
static void cpu_notify_map_clients(void)
2063
{
2064
    MapClient *client;
2065

    
2066
    while (!QLIST_EMPTY(&map_client_list)) {
2067
        client = QLIST_FIRST(&map_client_list);
2068
        client->callback(client->opaque);
2069
        cpu_unregister_map_client(client);
2070
    }
2071
}
2072

    
2073
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2074
{
2075
    MemoryRegionSection *section;
2076
    hwaddr l, xlat;
2077

    
2078
    while (len > 0) {
2079
        l = len;
2080
        section = address_space_translate(as, addr, &xlat, &l, is_write);
2081
        if (!memory_access_is_direct(section->mr, is_write)) {
2082
            l = memory_access_size(l, addr);
2083
            if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
2084
                return false;
2085
            }
2086
        }
2087

    
2088
        len -= l;
2089
        addr += l;
2090
    }
2091
    return true;
2092
}
2093

    
2094
/* Map a physical memory region into a host virtual address.
2095
 * May map a subset of the requested range, given by and returned in *plen.
2096
 * May return NULL if resources needed to perform the mapping are exhausted.
2097
 * Use only for reads OR writes - not for read-modify-write operations.
2098
 * Use cpu_register_map_client() to know when retrying the map operation is
2099
 * likely to succeed.
2100
 */
2101
void *address_space_map(AddressSpace *as,
2102
                        hwaddr addr,
2103
                        hwaddr *plen,
2104
                        bool is_write)
2105
{
2106
    hwaddr len = *plen;
2107
    hwaddr todo = 0;
2108
    hwaddr l, xlat;
2109
    MemoryRegionSection *section;
2110
    ram_addr_t raddr = RAM_ADDR_MAX;
2111
    ram_addr_t rlen;
2112
    void *ret;
2113

    
2114
    while (len > 0) {
2115
        l = len;
2116
        section = address_space_translate(as, addr, &xlat, &l, is_write);
2117

    
2118
        if (!memory_access_is_direct(section->mr, is_write)) {
2119
            if (todo || bounce.buffer) {
2120
                break;
2121
            }
2122
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2123
            bounce.addr = addr;
2124
            bounce.len = l;
2125
            if (!is_write) {
2126
                address_space_read(as, addr, bounce.buffer, l);
2127
            }
2128

    
2129
            *plen = l;
2130
            return bounce.buffer;
2131
        }
2132
        if (!todo) {
2133
            raddr = memory_region_get_ram_addr(section->mr) + xlat;
2134
        } else {
2135
            if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2136
                break;
2137
            }
2138
        }
2139

    
2140
        len -= l;
2141
        addr += l;
2142
        todo += l;
2143
    }
2144
    rlen = todo;
2145
    ret = qemu_ram_ptr_length(raddr, &rlen);
2146
    *plen = rlen;
2147
    return ret;
2148
}
2149

    
2150
/* Unmaps a memory region previously mapped by address_space_map().
2151
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2152
 * the amount of memory that was actually read or written by the caller.
2153
 */
2154
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2155
                         int is_write, hwaddr access_len)
2156
{
2157
    if (buffer != bounce.buffer) {
2158
        if (is_write) {
2159
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2160
            while (access_len) {
2161
                unsigned l;
2162
                l = TARGET_PAGE_SIZE;
2163
                if (l > access_len)
2164
                    l = access_len;
2165
                invalidate_and_set_dirty(addr1, l);
2166
                addr1 += l;
2167
                access_len -= l;
2168
            }
2169
        }
2170
        if (xen_enabled()) {
2171
            xen_invalidate_map_cache_entry(buffer);
2172
        }
2173
        return;
2174
    }
2175
    if (is_write) {
2176
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2177
    }
2178
    qemu_vfree(bounce.buffer);
2179
    bounce.buffer = NULL;
2180
    cpu_notify_map_clients();
2181
}
2182

    
2183
void *cpu_physical_memory_map(hwaddr addr,
2184
                              hwaddr *plen,
2185
                              int is_write)
2186
{
2187
    return address_space_map(&address_space_memory, addr, plen, is_write);
2188
}
2189

    
2190
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2191
                               int is_write, hwaddr access_len)
2192
{
2193
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2194
}
2195

    
2196
/* warning: addr must be aligned */
2197
static inline uint32_t ldl_phys_internal(hwaddr addr,
2198
                                         enum device_endian endian)
2199
{
2200
    uint8_t *ptr;
2201
    uint64_t val;
2202
    MemoryRegionSection *section;
2203
    hwaddr l = 4;
2204
    hwaddr addr1;
2205

    
2206
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2207
                                      false);
2208
    if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2209
        /* I/O case */
2210
        io_mem_read(section->mr, addr1, &val, 4);
2211
#if defined(TARGET_WORDS_BIGENDIAN)
2212
        if (endian == DEVICE_LITTLE_ENDIAN) {
2213
            val = bswap32(val);
2214
        }
2215
#else
2216
        if (endian == DEVICE_BIG_ENDIAN) {
2217
            val = bswap32(val);
2218
        }
2219
#endif
2220
    } else {
2221
        /* RAM case */
2222
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2223
                                & TARGET_PAGE_MASK)
2224
                               + addr1);
2225
        switch (endian) {
2226
        case DEVICE_LITTLE_ENDIAN:
2227
            val = ldl_le_p(ptr);
2228
            break;
2229
        case DEVICE_BIG_ENDIAN:
2230
            val = ldl_be_p(ptr);
2231
            break;
2232
        default:
2233
            val = ldl_p(ptr);
2234
            break;
2235
        }
2236
    }
2237
    return val;
2238
}
2239

    
2240
uint32_t ldl_phys(hwaddr addr)
2241
{
2242
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2243
}
2244

    
2245
uint32_t ldl_le_phys(hwaddr addr)
2246
{
2247
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2248
}
2249

    
2250
uint32_t ldl_be_phys(hwaddr addr)
2251
{
2252
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2253
}
2254

    
2255
/* warning: addr must be aligned */
2256
static inline uint64_t ldq_phys_internal(hwaddr addr,
2257
                                         enum device_endian endian)
2258
{
2259
    uint8_t *ptr;
2260
    uint64_t val;
2261
    MemoryRegionSection *section;
2262
    hwaddr l = 8;
2263
    hwaddr addr1;
2264

    
2265
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2266
                                      false);
2267
    if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2268
        /* I/O case */
2269
        io_mem_read(section->mr, addr1, &val, 8);
2270
#if defined(TARGET_WORDS_BIGENDIAN)
2271
        if (endian == DEVICE_LITTLE_ENDIAN) {
2272
            val = bswap64(val);
2273
        }
2274
#else
2275
        if (endian == DEVICE_BIG_ENDIAN) {
2276
            val = bswap64(val);
2277
        }
2278
#endif
2279
    } else {
2280
        /* RAM case */
2281
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2282
                                & TARGET_PAGE_MASK)
2283
                               + addr1);
2284
        switch (endian) {
2285
        case DEVICE_LITTLE_ENDIAN:
2286
            val = ldq_le_p(ptr);
2287
            break;
2288
        case DEVICE_BIG_ENDIAN:
2289
            val = ldq_be_p(ptr);
2290
            break;
2291
        default:
2292
            val = ldq_p(ptr);
2293
            break;
2294
        }
2295
    }
2296
    return val;
2297
}
2298

    
2299
uint64_t ldq_phys(hwaddr addr)
2300
{
2301
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2302
}
2303

    
2304
uint64_t ldq_le_phys(hwaddr addr)
2305
{
2306
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2307
}
2308

    
2309
uint64_t ldq_be_phys(hwaddr addr)
2310
{
2311
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2312
}
2313

    
2314
/* XXX: optimize */
2315
uint32_t ldub_phys(hwaddr addr)
2316
{
2317
    uint8_t val;
2318
    cpu_physical_memory_read(addr, &val, 1);
2319
    return val;
2320
}
2321

    
2322
/* warning: addr must be aligned */
2323
static inline uint32_t lduw_phys_internal(hwaddr addr,
2324
                                          enum device_endian endian)
2325
{
2326
    uint8_t *ptr;
2327
    uint64_t val;
2328
    MemoryRegionSection *section;
2329
    hwaddr l = 2;
2330
    hwaddr addr1;
2331

    
2332
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2333
                                      false);
2334
    if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2335
        /* I/O case */
2336
        io_mem_read(section->mr, addr1, &val, 2);
2337
#if defined(TARGET_WORDS_BIGENDIAN)
2338
        if (endian == DEVICE_LITTLE_ENDIAN) {
2339
            val = bswap16(val);
2340
        }
2341
#else
2342
        if (endian == DEVICE_BIG_ENDIAN) {
2343
            val = bswap16(val);
2344
        }
2345
#endif
2346
    } else {
2347
        /* RAM case */
2348
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2349
                                & TARGET_PAGE_MASK)
2350
                               + addr1);
2351
        switch (endian) {
2352
        case DEVICE_LITTLE_ENDIAN:
2353
            val = lduw_le_p(ptr);
2354
            break;
2355
        case DEVICE_BIG_ENDIAN:
2356
            val = lduw_be_p(ptr);
2357
            break;
2358
        default:
2359
            val = lduw_p(ptr);
2360
            break;
2361
        }
2362
    }
2363
    return val;
2364
}
2365

    
2366
uint32_t lduw_phys(hwaddr addr)
2367
{
2368
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2369
}
2370

    
2371
uint32_t lduw_le_phys(hwaddr addr)
2372
{
2373
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2374
}
2375

    
2376
uint32_t lduw_be_phys(hwaddr addr)
2377
{
2378
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2379
}
2380

    
2381
/* warning: addr must be aligned. The ram page is not masked as dirty
2382
   and the code inside is not invalidated. It is useful if the dirty
2383
   bits are used to track modified PTEs */
2384
void stl_phys_notdirty(hwaddr addr, uint32_t val)
2385
{
2386
    uint8_t *ptr;
2387
    MemoryRegionSection *section;
2388
    hwaddr l = 4;
2389
    hwaddr addr1;
2390

    
2391
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2392
                                      true);
2393
    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2394
        io_mem_write(section->mr, addr1, val, 4);
2395
    } else {
2396
        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2397
        ptr = qemu_get_ram_ptr(addr1);
2398
        stl_p(ptr, val);
2399

    
2400
        if (unlikely(in_migration)) {
2401
            if (!cpu_physical_memory_is_dirty(addr1)) {
2402
                /* invalidate code */
2403
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2404
                /* set dirty bit */
2405
                cpu_physical_memory_set_dirty_flags(
2406
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2407
            }
2408
        }
2409
    }
2410
}
2411

    
2412
/* warning: addr must be aligned */
2413
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2414
                                     enum device_endian endian)
2415
{
2416
    uint8_t *ptr;
2417
    MemoryRegionSection *section;
2418
    hwaddr l = 4;
2419
    hwaddr addr1;
2420

    
2421
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2422
                                      true);
2423
    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2424
#if defined(TARGET_WORDS_BIGENDIAN)
2425
        if (endian == DEVICE_LITTLE_ENDIAN) {
2426
            val = bswap32(val);
2427
        }
2428
#else
2429
        if (endian == DEVICE_BIG_ENDIAN) {
2430
            val = bswap32(val);
2431
        }
2432
#endif
2433
        io_mem_write(section->mr, addr1, val, 4);
2434
    } else {
2435
        /* RAM case */
2436
        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2437
        ptr = qemu_get_ram_ptr(addr1);
2438
        switch (endian) {
2439
        case DEVICE_LITTLE_ENDIAN:
2440
            stl_le_p(ptr, val);
2441
            break;
2442
        case DEVICE_BIG_ENDIAN:
2443
            stl_be_p(ptr, val);
2444
            break;
2445
        default:
2446
            stl_p(ptr, val);
2447
            break;
2448
        }
2449
        invalidate_and_set_dirty(addr1, 4);
2450
    }
2451
}
2452

    
2453
void stl_phys(hwaddr addr, uint32_t val)
2454
{
2455
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2456
}
2457

    
2458
void stl_le_phys(hwaddr addr, uint32_t val)
2459
{
2460
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2461
}
2462

    
2463
void stl_be_phys(hwaddr addr, uint32_t val)
2464
{
2465
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2466
}
2467

    
2468
/* XXX: optimize */
2469
void stb_phys(hwaddr addr, uint32_t val)
2470
{
2471
    uint8_t v = val;
2472
    cpu_physical_memory_write(addr, &v, 1);
2473
}
2474

    
2475
/* warning: addr must be aligned */
2476
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2477
                                     enum device_endian endian)
2478
{
2479
    uint8_t *ptr;
2480
    MemoryRegionSection *section;
2481
    hwaddr l = 2;
2482
    hwaddr addr1;
2483

    
2484
    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2485
                                      true);
2486
    if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2487
#if defined(TARGET_WORDS_BIGENDIAN)
2488
        if (endian == DEVICE_LITTLE_ENDIAN) {
2489
            val = bswap16(val);
2490
        }
2491
#else
2492
        if (endian == DEVICE_BIG_ENDIAN) {
2493
            val = bswap16(val);
2494
        }
2495
#endif
2496
        io_mem_write(section->mr, addr1, val, 2);
2497
    } else {
2498
        /* RAM case */
2499
        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2500
        ptr = qemu_get_ram_ptr(addr1);
2501
        switch (endian) {
2502
        case DEVICE_LITTLE_ENDIAN:
2503
            stw_le_p(ptr, val);
2504
            break;
2505
        case DEVICE_BIG_ENDIAN:
2506
            stw_be_p(ptr, val);
2507
            break;
2508
        default:
2509
            stw_p(ptr, val);
2510
            break;
2511
        }
2512
        invalidate_and_set_dirty(addr1, 2);
2513
    }
2514
}
2515

    
2516
void stw_phys(hwaddr addr, uint32_t val)
2517
{
2518
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2519
}
2520

    
2521
void stw_le_phys(hwaddr addr, uint32_t val)
2522
{
2523
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2524
}
2525

    
2526
void stw_be_phys(hwaddr addr, uint32_t val)
2527
{
2528
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2529
}
2530

    
2531
/* XXX: optimize */
2532
void stq_phys(hwaddr addr, uint64_t val)
2533
{
2534
    val = tswap64(val);
2535
    cpu_physical_memory_write(addr, &val, 8);
2536
}
2537

    
2538
void stq_le_phys(hwaddr addr, uint64_t val)
2539
{
2540
    val = cpu_to_le64(val);
2541
    cpu_physical_memory_write(addr, &val, 8);
2542
}
2543

    
2544
void stq_be_phys(hwaddr addr, uint64_t val)
2545
{
2546
    val = cpu_to_be64(val);
2547
    cpu_physical_memory_write(addr, &val, 8);
2548
}
2549

    
2550
/* virtual memory access for debug (includes writing to ROM) */
2551
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2552
                        uint8_t *buf, int len, int is_write)
2553
{
2554
    int l;
2555
    hwaddr phys_addr;
2556
    target_ulong page;
2557

    
2558
    while (len > 0) {
2559
        page = addr & TARGET_PAGE_MASK;
2560
        phys_addr = cpu_get_phys_page_debug(env, page);
2561
        /* if no physical page mapped, return an error */
2562
        if (phys_addr == -1)
2563
            return -1;
2564
        l = (page + TARGET_PAGE_SIZE) - addr;
2565
        if (l > len)
2566
            l = len;
2567
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2568
        if (is_write)
2569
            cpu_physical_memory_write_rom(phys_addr, buf, l);
2570
        else
2571
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2572
        len -= l;
2573
        buf += l;
2574
        addr += l;
2575
    }
2576
    return 0;
2577
}
2578
#endif
2579

    
2580
#if !defined(CONFIG_USER_ONLY)
2581

    
2582
/*
2583
 * A helper function for the _utterly broken_ virtio device model to find out if
2584
 * it's running on a big endian machine. Don't do this at home kids!
2585
 */
2586
bool virtio_is_big_endian(void);
2587
bool virtio_is_big_endian(void)
2588
{
2589
#if defined(TARGET_WORDS_BIGENDIAN)
2590
    return true;
2591
#else
2592
    return false;
2593
#endif
2594
}
2595

    
2596
#endif
2597

    
2598
#ifndef CONFIG_USER_ONLY
2599
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2600
{
2601
    MemoryRegionSection *section;
2602
    hwaddr l = 1;
2603

    
2604
    section = address_space_translate(&address_space_memory,
2605
                                      phys_addr, &phys_addr, &l, false);
2606

    
2607
    return !(memory_region_is_ram(section->mr) ||
2608
             memory_region_is_romd(section->mr));
2609
}
2610
#endif