Statistics
| Branch: | Revision:

root / exec.c @ 052e87b0

History | View | Annotate | Download (71.6 kB)

1
/*
2
 *  Virtual page mapping
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "qemu/osdep.h"
33
#include "sysemu/kvm.h"
34
#include "hw/xen/xen.h"
35
#include "qemu/timer.h"
36
#include "qemu/config-file.h"
37
#include "exec/memory.h"
38
#include "sysemu/dma.h"
39
#include "exec/address-spaces.h"
40
#if defined(CONFIG_USER_ONLY)
41
#include <qemu.h>
42
#else /* !CONFIG_USER_ONLY */
43
#include "sysemu/xen-mapcache.h"
44
#include "trace.h"
45
#endif
46
#include "exec/cpu-all.h"
47

    
48
#include "exec/cputlb.h"
49
#include "translate-all.h"
50

    
51
#include "exec/memory-internal.h"
52

    
53
//#define DEBUG_SUBPAGE
54

    
55
#if !defined(CONFIG_USER_ONLY)
56
int phys_ram_fd;
57
static int in_migration;
58

    
59
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60

    
61
static MemoryRegion *system_memory;
62
static MemoryRegion *system_io;
63

    
64
AddressSpace address_space_io;
65
AddressSpace address_space_memory;
66
DMAContext dma_context_memory;
67

    
68
MemoryRegion io_mem_rom, io_mem_notdirty;
69
static MemoryRegion io_mem_unassigned;
70

    
71
#endif
72

    
73
CPUArchState *first_cpu;
74
/* current CPU in the current thread. It is only valid inside
75
   cpu_exec() */
76
DEFINE_TLS(CPUArchState *,cpu_single_env);
77
/* 0 = Do not count executed instructions.
78
   1 = Precise instruction counting.
79
   2 = Adaptive rate instruction counting.  */
80
int use_icount;
81

    
82
#if !defined(CONFIG_USER_ONLY)
83

    
84
typedef struct PhysPageEntry PhysPageEntry;
85

    
86
struct PhysPageEntry {
87
    uint16_t is_leaf : 1;
88
     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
89
    uint16_t ptr : 15;
90
};
91

    
92
struct AddressSpaceDispatch {
93
    /* This is a multi-level map on the physical address space.
94
     * The bottom level has pointers to MemoryRegionSections.
95
     */
96
    PhysPageEntry phys_map;
97
    MemoryListener listener;
98
    AddressSpace *as;
99
};
100

    
101
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
102
typedef struct subpage_t {
103
    MemoryRegion iomem;
104
    AddressSpace *as;
105
    hwaddr base;
106
    uint16_t sub_section[TARGET_PAGE_SIZE];
107
} subpage_t;
108

    
109
static MemoryRegionSection *phys_sections;
110
static unsigned phys_sections_nb, phys_sections_nb_alloc;
111
static uint16_t phys_section_unassigned;
112
static uint16_t phys_section_notdirty;
113
static uint16_t phys_section_rom;
114
static uint16_t phys_section_watch;
115

    
116
/* Simple allocator for PhysPageEntry nodes */
117
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
118
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
119

    
120
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
121

    
122
static void io_mem_init(void);
123
static void memory_map_init(void);
124
static void *qemu_safe_ram_ptr(ram_addr_t addr);
125

    
126
static MemoryRegion io_mem_watch;
127
#endif
128

    
129
#if !defined(CONFIG_USER_ONLY)
130

    
131
static void phys_map_node_reserve(unsigned nodes)
132
{
133
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
134
        typedef PhysPageEntry Node[L2_SIZE];
135
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
136
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
137
                                      phys_map_nodes_nb + nodes);
138
        phys_map_nodes = g_renew(Node, phys_map_nodes,
139
                                 phys_map_nodes_nb_alloc);
140
    }
141
}
142

    
143
static uint16_t phys_map_node_alloc(void)
144
{
145
    unsigned i;
146
    uint16_t ret;
147

    
148
    ret = phys_map_nodes_nb++;
149
    assert(ret != PHYS_MAP_NODE_NIL);
150
    assert(ret != phys_map_nodes_nb_alloc);
151
    for (i = 0; i < L2_SIZE; ++i) {
152
        phys_map_nodes[ret][i].is_leaf = 0;
153
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
154
    }
155
    return ret;
156
}
157

    
158
static void phys_map_nodes_reset(void)
159
{
160
    phys_map_nodes_nb = 0;
161
}
162

    
163

    
164
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
165
                                hwaddr *nb, uint16_t leaf,
166
                                int level)
167
{
168
    PhysPageEntry *p;
169
    int i;
170
    hwaddr step = (hwaddr)1 << (level * L2_BITS);
171

    
172
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
173
        lp->ptr = phys_map_node_alloc();
174
        p = phys_map_nodes[lp->ptr];
175
        if (level == 0) {
176
            for (i = 0; i < L2_SIZE; i++) {
177
                p[i].is_leaf = 1;
178
                p[i].ptr = phys_section_unassigned;
179
            }
180
        }
181
    } else {
182
        p = phys_map_nodes[lp->ptr];
183
    }
184
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
185

    
186
    while (*nb && lp < &p[L2_SIZE]) {
187
        if ((*index & (step - 1)) == 0 && *nb >= step) {
188
            lp->is_leaf = true;
189
            lp->ptr = leaf;
190
            *index += step;
191
            *nb -= step;
192
        } else {
193
            phys_page_set_level(lp, index, nb, leaf, level - 1);
194
        }
195
        ++lp;
196
    }
197
}
198

    
199
static void phys_page_set(AddressSpaceDispatch *d,
200
                          hwaddr index, hwaddr nb,
201
                          uint16_t leaf)
202
{
203
    /* Wildly overreserve - it doesn't matter much. */
204
    phys_map_node_reserve(3 * P_L2_LEVELS);
205

    
206
    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
207
}
208

    
209
static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
210
{
211
    PhysPageEntry lp = d->phys_map;
212
    PhysPageEntry *p;
213
    int i;
214

    
215
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
216
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
217
            return &phys_sections[phys_section_unassigned];
218
        }
219
        p = phys_map_nodes[lp.ptr];
220
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
221
    }
222
    return &phys_sections[lp.ptr];
223
}
224

    
225
bool memory_region_is_unassigned(MemoryRegion *mr)
226
{
227
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
228
        && mr != &io_mem_watch;
229
}
230

    
231
static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
232
                                                        hwaddr addr,
233
                                                        bool resolve_subpage)
234
{
235
    MemoryRegionSection *section;
236
    subpage_t *subpage;
237

    
238
    section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
239
    if (resolve_subpage && section->mr->subpage) {
240
        subpage = container_of(section->mr, subpage_t, iomem);
241
        section = &phys_sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
242
    }
243
    return section;
244
}
245

    
246
static MemoryRegionSection *
247
address_space_translate_internal(AddressSpace *as, hwaddr addr, hwaddr *xlat,
248
                                 hwaddr *plen, bool resolve_subpage)
249
{
250
    MemoryRegionSection *section;
251
    Int128 diff;
252

    
253
    section = address_space_lookup_region(as, addr, resolve_subpage);
254
    /* Compute offset within MemoryRegionSection */
255
    addr -= section->offset_within_address_space;
256

    
257
    /* Compute offset within MemoryRegion */
258
    *xlat = addr + section->offset_within_region;
259

    
260
    diff = int128_sub(section->mr->size, int128_make64(addr));
261
    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
262
    return section;
263
}
264

    
265
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
266
                                      hwaddr *xlat, hwaddr *plen,
267
                                      bool is_write)
268
{
269
    return address_space_translate_internal(as, addr, xlat, plen, true)->mr;
270
}
271

    
272
MemoryRegionSection *
273
address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
274
                                  hwaddr *plen)
275
{
276
    return address_space_translate_internal(as, addr, xlat, plen, false);
277
}
278
#endif
279

    
280
void cpu_exec_init_all(void)
281
{
282
#if !defined(CONFIG_USER_ONLY)
283
    qemu_mutex_init(&ram_list.mutex);
284
    memory_map_init();
285
    io_mem_init();
286
#endif
287
}
288

    
289
#if !defined(CONFIG_USER_ONLY)
290

    
291
static int cpu_common_post_load(void *opaque, int version_id)
292
{
293
    CPUState *cpu = opaque;
294

    
295
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
296
       version_id is increased. */
297
    cpu->interrupt_request &= ~0x01;
298
    tlb_flush(cpu->env_ptr, 1);
299

    
300
    return 0;
301
}
302

    
303
static const VMStateDescription vmstate_cpu_common = {
304
    .name = "cpu_common",
305
    .version_id = 1,
306
    .minimum_version_id = 1,
307
    .minimum_version_id_old = 1,
308
    .post_load = cpu_common_post_load,
309
    .fields      = (VMStateField []) {
310
        VMSTATE_UINT32(halted, CPUState),
311
        VMSTATE_UINT32(interrupt_request, CPUState),
312
        VMSTATE_END_OF_LIST()
313
    }
314
};
315
#else
316
#define vmstate_cpu_common vmstate_dummy
317
#endif
318

    
319
CPUState *qemu_get_cpu(int index)
320
{
321
    CPUArchState *env = first_cpu;
322
    CPUState *cpu = NULL;
323

    
324
    while (env) {
325
        cpu = ENV_GET_CPU(env);
326
        if (cpu->cpu_index == index) {
327
            break;
328
        }
329
        env = env->next_cpu;
330
    }
331

    
332
    return env ? cpu : NULL;
333
}
334

    
335
void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
336
{
337
    CPUArchState *env = first_cpu;
338

    
339
    while (env) {
340
        func(ENV_GET_CPU(env), data);
341
        env = env->next_cpu;
342
    }
343
}
344

    
345
void cpu_exec_init(CPUArchState *env)
346
{
347
    CPUState *cpu = ENV_GET_CPU(env);
348
    CPUClass *cc = CPU_GET_CLASS(cpu);
349
    CPUArchState **penv;
350
    int cpu_index;
351

    
352
#if defined(CONFIG_USER_ONLY)
353
    cpu_list_lock();
354
#endif
355
    env->next_cpu = NULL;
356
    penv = &first_cpu;
357
    cpu_index = 0;
358
    while (*penv != NULL) {
359
        penv = &(*penv)->next_cpu;
360
        cpu_index++;
361
    }
362
    cpu->cpu_index = cpu_index;
363
    cpu->numa_node = 0;
364
    QTAILQ_INIT(&env->breakpoints);
365
    QTAILQ_INIT(&env->watchpoints);
366
#ifndef CONFIG_USER_ONLY
367
    cpu->thread_id = qemu_get_thread_id();
368
#endif
369
    *penv = env;
370
#if defined(CONFIG_USER_ONLY)
371
    cpu_list_unlock();
372
#endif
373
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
374
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
375
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
376
                    cpu_save, cpu_load, env);
377
    assert(cc->vmsd == NULL);
378
#endif
379
    if (cc->vmsd != NULL) {
380
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
381
    }
382
}
383

    
384
#if defined(TARGET_HAS_ICE)
385
#if defined(CONFIG_USER_ONLY)
386
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
387
{
388
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
389
}
390
#else
391
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
392
{
393
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
394
            (pc & ~TARGET_PAGE_MASK));
395
}
396
#endif
397
#endif /* TARGET_HAS_ICE */
398

    
399
#if defined(CONFIG_USER_ONLY)
400
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
401

    
402
{
403
}
404

    
405
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
406
                          int flags, CPUWatchpoint **watchpoint)
407
{
408
    return -ENOSYS;
409
}
410
#else
411
/* Add a watchpoint.  */
412
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
413
                          int flags, CPUWatchpoint **watchpoint)
414
{
415
    target_ulong len_mask = ~(len - 1);
416
    CPUWatchpoint *wp;
417

    
418
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
419
    if ((len & (len - 1)) || (addr & ~len_mask) ||
420
            len == 0 || len > TARGET_PAGE_SIZE) {
421
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
422
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
423
        return -EINVAL;
424
    }
425
    wp = g_malloc(sizeof(*wp));
426

    
427
    wp->vaddr = addr;
428
    wp->len_mask = len_mask;
429
    wp->flags = flags;
430

    
431
    /* keep all GDB-injected watchpoints in front */
432
    if (flags & BP_GDB)
433
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
434
    else
435
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
436

    
437
    tlb_flush_page(env, addr);
438

    
439
    if (watchpoint)
440
        *watchpoint = wp;
441
    return 0;
442
}
443

    
444
/* Remove a specific watchpoint.  */
445
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
446
                          int flags)
447
{
448
    target_ulong len_mask = ~(len - 1);
449
    CPUWatchpoint *wp;
450

    
451
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
452
        if (addr == wp->vaddr && len_mask == wp->len_mask
453
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
454
            cpu_watchpoint_remove_by_ref(env, wp);
455
            return 0;
456
        }
457
    }
458
    return -ENOENT;
459
}
460

    
461
/* Remove a specific watchpoint by reference.  */
462
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
463
{
464
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
465

    
466
    tlb_flush_page(env, watchpoint->vaddr);
467

    
468
    g_free(watchpoint);
469
}
470

    
471
/* Remove all matching watchpoints.  */
472
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
473
{
474
    CPUWatchpoint *wp, *next;
475

    
476
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
477
        if (wp->flags & mask)
478
            cpu_watchpoint_remove_by_ref(env, wp);
479
    }
480
}
481
#endif
482

    
483
/* Add a breakpoint.  */
484
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
485
                          CPUBreakpoint **breakpoint)
486
{
487
#if defined(TARGET_HAS_ICE)
488
    CPUBreakpoint *bp;
489

    
490
    bp = g_malloc(sizeof(*bp));
491

    
492
    bp->pc = pc;
493
    bp->flags = flags;
494

    
495
    /* keep all GDB-injected breakpoints in front */
496
    if (flags & BP_GDB)
497
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
498
    else
499
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
500

    
501
    breakpoint_invalidate(env, pc);
502

    
503
    if (breakpoint)
504
        *breakpoint = bp;
505
    return 0;
506
#else
507
    return -ENOSYS;
508
#endif
509
}
510

    
511
/* Remove a specific breakpoint.  */
512
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
513
{
514
#if defined(TARGET_HAS_ICE)
515
    CPUBreakpoint *bp;
516

    
517
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
518
        if (bp->pc == pc && bp->flags == flags) {
519
            cpu_breakpoint_remove_by_ref(env, bp);
520
            return 0;
521
        }
522
    }
523
    return -ENOENT;
524
#else
525
    return -ENOSYS;
526
#endif
527
}
528

    
529
/* Remove a specific breakpoint by reference.  */
530
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
531
{
532
#if defined(TARGET_HAS_ICE)
533
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
534

    
535
    breakpoint_invalidate(env, breakpoint->pc);
536

    
537
    g_free(breakpoint);
538
#endif
539
}
540

    
541
/* Remove all matching breakpoints. */
542
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
543
{
544
#if defined(TARGET_HAS_ICE)
545
    CPUBreakpoint *bp, *next;
546

    
547
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
548
        if (bp->flags & mask)
549
            cpu_breakpoint_remove_by_ref(env, bp);
550
    }
551
#endif
552
}
553

    
554
/* enable or disable single step mode. EXCP_DEBUG is returned by the
555
   CPU loop after each instruction */
556
void cpu_single_step(CPUArchState *env, int enabled)
557
{
558
#if defined(TARGET_HAS_ICE)
559
    if (env->singlestep_enabled != enabled) {
560
        env->singlestep_enabled = enabled;
561
        if (kvm_enabled())
562
            kvm_update_guest_debug(env, 0);
563
        else {
564
            /* must flush all the translated code to avoid inconsistencies */
565
            /* XXX: only flush what is necessary */
566
            tb_flush(env);
567
        }
568
    }
569
#endif
570
}
571

    
572
void cpu_exit(CPUArchState *env)
573
{
574
    CPUState *cpu = ENV_GET_CPU(env);
575

    
576
    cpu->exit_request = 1;
577
    cpu->tcg_exit_req = 1;
578
}
579

    
580
void cpu_abort(CPUArchState *env, const char *fmt, ...)
581
{
582
    va_list ap;
583
    va_list ap2;
584

    
585
    va_start(ap, fmt);
586
    va_copy(ap2, ap);
587
    fprintf(stderr, "qemu: fatal: ");
588
    vfprintf(stderr, fmt, ap);
589
    fprintf(stderr, "\n");
590
    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
591
    if (qemu_log_enabled()) {
592
        qemu_log("qemu: fatal: ");
593
        qemu_log_vprintf(fmt, ap2);
594
        qemu_log("\n");
595
        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
596
        qemu_log_flush();
597
        qemu_log_close();
598
    }
599
    va_end(ap2);
600
    va_end(ap);
601
#if defined(CONFIG_USER_ONLY)
602
    {
603
        struct sigaction act;
604
        sigfillset(&act.sa_mask);
605
        act.sa_handler = SIG_DFL;
606
        sigaction(SIGABRT, &act, NULL);
607
    }
608
#endif
609
    abort();
610
}
611

    
612
CPUArchState *cpu_copy(CPUArchState *env)
613
{
614
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
615
    CPUArchState *next_cpu = new_env->next_cpu;
616
#if defined(TARGET_HAS_ICE)
617
    CPUBreakpoint *bp;
618
    CPUWatchpoint *wp;
619
#endif
620

    
621
    memcpy(new_env, env, sizeof(CPUArchState));
622

    
623
    /* Preserve chaining. */
624
    new_env->next_cpu = next_cpu;
625

    
626
    /* Clone all break/watchpoints.
627
       Note: Once we support ptrace with hw-debug register access, make sure
628
       BP_CPU break/watchpoints are handled correctly on clone. */
629
    QTAILQ_INIT(&env->breakpoints);
630
    QTAILQ_INIT(&env->watchpoints);
631
#if defined(TARGET_HAS_ICE)
632
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
633
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
634
    }
635
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
636
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
637
                              wp->flags, NULL);
638
    }
639
#endif
640

    
641
    return new_env;
642
}
643

    
644
#if !defined(CONFIG_USER_ONLY)
645
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
646
                                      uintptr_t length)
647
{
648
    uintptr_t start1;
649

    
650
    /* we modify the TLB cache so that the dirty bit will be set again
651
       when accessing the range */
652
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
653
    /* Check that we don't span multiple blocks - this breaks the
654
       address comparisons below.  */
655
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
656
            != (end - 1) - start) {
657
        abort();
658
    }
659
    cpu_tlb_reset_dirty_all(start1, length);
660

    
661
}
662

    
663
/* Note: start and end must be within the same ram block.  */
664
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
665
                                     int dirty_flags)
666
{
667
    uintptr_t length;
668

    
669
    start &= TARGET_PAGE_MASK;
670
    end = TARGET_PAGE_ALIGN(end);
671

    
672
    length = end - start;
673
    if (length == 0)
674
        return;
675
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
676

    
677
    if (tcg_enabled()) {
678
        tlb_reset_dirty_range_all(start, end, length);
679
    }
680
}
681

    
682
static int cpu_physical_memory_set_dirty_tracking(int enable)
683
{
684
    int ret = 0;
685
    in_migration = enable;
686
    return ret;
687
}
688

    
689
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
690
                                       MemoryRegionSection *section,
691
                                       target_ulong vaddr,
692
                                       hwaddr paddr, hwaddr xlat,
693
                                       int prot,
694
                                       target_ulong *address)
695
{
696
    hwaddr iotlb;
697
    CPUWatchpoint *wp;
698

    
699
    if (memory_region_is_ram(section->mr)) {
700
        /* Normal RAM.  */
701
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
702
            + xlat;
703
        if (!section->readonly) {
704
            iotlb |= phys_section_notdirty;
705
        } else {
706
            iotlb |= phys_section_rom;
707
        }
708
    } else {
709
        iotlb = section - phys_sections;
710
        iotlb += xlat;
711
    }
712

    
713
    /* Make accesses to pages with watchpoints go via the
714
       watchpoint trap routines.  */
715
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
716
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
717
            /* Avoid trapping reads of pages with a write breakpoint. */
718
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
719
                iotlb = phys_section_watch + paddr;
720
                *address |= TLB_MMIO;
721
                break;
722
            }
723
        }
724
    }
725

    
726
    return iotlb;
727
}
728
#endif /* defined(CONFIG_USER_ONLY) */
729

    
730
#if !defined(CONFIG_USER_ONLY)
731

    
732
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
733
                             uint16_t section);
734
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
735
static void destroy_page_desc(uint16_t section_index)
736
{
737
    MemoryRegionSection *section = &phys_sections[section_index];
738
    MemoryRegion *mr = section->mr;
739

    
740
    if (mr->subpage) {
741
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
742
        memory_region_destroy(&subpage->iomem);
743
        g_free(subpage);
744
    }
745
}
746

    
747
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
748
{
749
    unsigned i;
750
    PhysPageEntry *p;
751

    
752
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
753
        return;
754
    }
755

    
756
    p = phys_map_nodes[lp->ptr];
757
    for (i = 0; i < L2_SIZE; ++i) {
758
        if (!p[i].is_leaf) {
759
            destroy_l2_mapping(&p[i], level - 1);
760
        } else {
761
            destroy_page_desc(p[i].ptr);
762
        }
763
    }
764
    lp->is_leaf = 0;
765
    lp->ptr = PHYS_MAP_NODE_NIL;
766
}
767

    
768
static void destroy_all_mappings(AddressSpaceDispatch *d)
769
{
770
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
771
    phys_map_nodes_reset();
772
}
773

    
774
static uint16_t phys_section_add(MemoryRegionSection *section)
775
{
776
    /* The physical section number is ORed with a page-aligned
777
     * pointer to produce the iotlb entries.  Thus it should
778
     * never overflow into the page-aligned value.
779
     */
780
    assert(phys_sections_nb < TARGET_PAGE_SIZE);
781

    
782
    if (phys_sections_nb == phys_sections_nb_alloc) {
783
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
784
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
785
                                phys_sections_nb_alloc);
786
    }
787
    phys_sections[phys_sections_nb] = *section;
788
    return phys_sections_nb++;
789
}
790

    
791
static void phys_sections_clear(void)
792
{
793
    phys_sections_nb = 0;
794
}
795

    
796
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
797
{
798
    subpage_t *subpage;
799
    hwaddr base = section->offset_within_address_space
800
        & TARGET_PAGE_MASK;
801
    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
802
    MemoryRegionSection subsection = {
803
        .offset_within_address_space = base,
804
        .size = int128_make64(TARGET_PAGE_SIZE),
805
    };
806
    hwaddr start, end;
807

    
808
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
809

    
810
    if (!(existing->mr->subpage)) {
811
        subpage = subpage_init(d->as, base);
812
        subsection.mr = &subpage->iomem;
813
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
814
                      phys_section_add(&subsection));
815
    } else {
816
        subpage = container_of(existing->mr, subpage_t, iomem);
817
    }
818
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
819
    end = start + int128_get64(section->size) - 1;
820
    subpage_register(subpage, start, end, phys_section_add(section));
821
}
822

    
823

    
824
static void register_multipage(AddressSpaceDispatch *d,
825
                               MemoryRegionSection *section)
826
{
827
    hwaddr start_addr = section->offset_within_address_space;
828
    uint16_t section_index = phys_section_add(section);
829
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
830
                                                    TARGET_PAGE_BITS));
831

    
832
    assert(num_pages);
833
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
834
}
835

    
836
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
837
{
838
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
839
    MemoryRegionSection now = *section, remain = *section;
840
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
841

    
842
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
843
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
844
                       - now.offset_within_address_space;
845

    
846
        now.size = int128_min(int128_make64(left), now.size);
847
        register_subpage(d, &now);
848
    } else {
849
        now.size = int128_zero();
850
    }
851
    while (int128_ne(remain.size, now.size)) {
852
        remain.size = int128_sub(remain.size, now.size);
853
        remain.offset_within_address_space += int128_get64(now.size);
854
        remain.offset_within_region += int128_get64(now.size);
855
        now = remain;
856
        if (int128_lt(remain.size, page_size)) {
857
            register_subpage(d, &now);
858
        } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
859
            now.size = page_size;
860
            register_subpage(d, &now);
861
        } else {
862
            now.size = int128_and(now.size, int128_neg(page_size));
863
            register_multipage(d, &now);
864
        }
865
    }
866
}
867

    
868
void qemu_flush_coalesced_mmio_buffer(void)
869
{
870
    if (kvm_enabled())
871
        kvm_flush_coalesced_mmio_buffer();
872
}
873

    
874
void qemu_mutex_lock_ramlist(void)
875
{
876
    qemu_mutex_lock(&ram_list.mutex);
877
}
878

    
879
void qemu_mutex_unlock_ramlist(void)
880
{
881
    qemu_mutex_unlock(&ram_list.mutex);
882
}
883

    
884
#if defined(__linux__) && !defined(TARGET_S390X)
885

    
886
#include <sys/vfs.h>
887

    
888
#define HUGETLBFS_MAGIC       0x958458f6
889

    
890
static long gethugepagesize(const char *path)
891
{
892
    struct statfs fs;
893
    int ret;
894

    
895
    do {
896
        ret = statfs(path, &fs);
897
    } while (ret != 0 && errno == EINTR);
898

    
899
    if (ret != 0) {
900
        perror(path);
901
        return 0;
902
    }
903

    
904
    if (fs.f_type != HUGETLBFS_MAGIC)
905
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
906

    
907
    return fs.f_bsize;
908
}
909

    
910
static void *file_ram_alloc(RAMBlock *block,
911
                            ram_addr_t memory,
912
                            const char *path)
913
{
914
    char *filename;
915
    char *sanitized_name;
916
    char *c;
917
    void *area;
918
    int fd;
919
#ifdef MAP_POPULATE
920
    int flags;
921
#endif
922
    unsigned long hpagesize;
923

    
924
    hpagesize = gethugepagesize(path);
925
    if (!hpagesize) {
926
        return NULL;
927
    }
928

    
929
    if (memory < hpagesize) {
930
        return NULL;
931
    }
932

    
933
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
934
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
935
        return NULL;
936
    }
937

    
938
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
939
    sanitized_name = g_strdup(block->mr->name);
940
    for (c = sanitized_name; *c != '\0'; c++) {
941
        if (*c == '/')
942
            *c = '_';
943
    }
944

    
945
    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
946
                               sanitized_name);
947
    g_free(sanitized_name);
948

    
949
    fd = mkstemp(filename);
950
    if (fd < 0) {
951
        perror("unable to create backing store for hugepages");
952
        g_free(filename);
953
        return NULL;
954
    }
955
    unlink(filename);
956
    g_free(filename);
957

    
958
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
959

    
960
    /*
961
     * ftruncate is not supported by hugetlbfs in older
962
     * hosts, so don't bother bailing out on errors.
963
     * If anything goes wrong with it under other filesystems,
964
     * mmap will fail.
965
     */
966
    if (ftruncate(fd, memory))
967
        perror("ftruncate");
968

    
969
#ifdef MAP_POPULATE
970
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
971
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
972
     * to sidestep this quirk.
973
     */
974
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
975
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
976
#else
977
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
978
#endif
979
    if (area == MAP_FAILED) {
980
        perror("file_ram_alloc: can't mmap RAM pages");
981
        close(fd);
982
        return (NULL);
983
    }
984
    block->fd = fd;
985
    return area;
986
}
987
#endif
988

    
989
static ram_addr_t find_ram_offset(ram_addr_t size)
990
{
991
    RAMBlock *block, *next_block;
992
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
993

    
994
    assert(size != 0); /* it would hand out same offset multiple times */
995

    
996
    if (QTAILQ_EMPTY(&ram_list.blocks))
997
        return 0;
998

    
999
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1000
        ram_addr_t end, next = RAM_ADDR_MAX;
1001

    
1002
        end = block->offset + block->length;
1003

    
1004
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1005
            if (next_block->offset >= end) {
1006
                next = MIN(next, next_block->offset);
1007
            }
1008
        }
1009
        if (next - end >= size && next - end < mingap) {
1010
            offset = end;
1011
            mingap = next - end;
1012
        }
1013
    }
1014

    
1015
    if (offset == RAM_ADDR_MAX) {
1016
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1017
                (uint64_t)size);
1018
        abort();
1019
    }
1020

    
1021
    return offset;
1022
}
1023

    
1024
ram_addr_t last_ram_offset(void)
1025
{
1026
    RAMBlock *block;
1027
    ram_addr_t last = 0;
1028

    
1029
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
1030
        last = MAX(last, block->offset + block->length);
1031

    
1032
    return last;
1033
}
1034

    
1035
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1036
{
1037
    int ret;
1038
    QemuOpts *machine_opts;
1039

    
1040
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1041
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1042
    if (machine_opts &&
1043
        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1044
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1045
        if (ret) {
1046
            perror("qemu_madvise");
1047
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1048
                            "but dump_guest_core=off specified\n");
1049
        }
1050
    }
1051
}
1052

    
1053
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1054
{
1055
    RAMBlock *new_block, *block;
1056

    
1057
    new_block = NULL;
1058
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1059
        if (block->offset == addr) {
1060
            new_block = block;
1061
            break;
1062
        }
1063
    }
1064
    assert(new_block);
1065
    assert(!new_block->idstr[0]);
1066

    
1067
    if (dev) {
1068
        char *id = qdev_get_dev_path(dev);
1069
        if (id) {
1070
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1071
            g_free(id);
1072
        }
1073
    }
1074
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1075

    
1076
    /* This assumes the iothread lock is taken here too.  */
1077
    qemu_mutex_lock_ramlist();
1078
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1079
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1080
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1081
                    new_block->idstr);
1082
            abort();
1083
        }
1084
    }
1085
    qemu_mutex_unlock_ramlist();
1086
}
1087

    
1088
static int memory_try_enable_merging(void *addr, size_t len)
1089
{
1090
    QemuOpts *opts;
1091

    
1092
    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1093
    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1094
        /* disabled by the user */
1095
        return 0;
1096
    }
1097

    
1098
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1099
}
1100

    
1101
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1102
                                   MemoryRegion *mr)
1103
{
1104
    RAMBlock *block, *new_block;
1105

    
1106
    size = TARGET_PAGE_ALIGN(size);
1107
    new_block = g_malloc0(sizeof(*new_block));
1108

    
1109
    /* This assumes the iothread lock is taken here too.  */
1110
    qemu_mutex_lock_ramlist();
1111
    new_block->mr = mr;
1112
    new_block->offset = find_ram_offset(size);
1113
    if (host) {
1114
        new_block->host = host;
1115
        new_block->flags |= RAM_PREALLOC_MASK;
1116
    } else {
1117
        if (mem_path) {
1118
#if defined (__linux__) && !defined(TARGET_S390X)
1119
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1120
            if (!new_block->host) {
1121
                new_block->host = qemu_anon_ram_alloc(size);
1122
                memory_try_enable_merging(new_block->host, size);
1123
            }
1124
#else
1125
            fprintf(stderr, "-mem-path option unsupported\n");
1126
            exit(1);
1127
#endif
1128
        } else {
1129
            if (xen_enabled()) {
1130
                xen_ram_alloc(new_block->offset, size, mr);
1131
            } else if (kvm_enabled()) {
1132
                /* some s390/kvm configurations have special constraints */
1133
                new_block->host = kvm_ram_alloc(size);
1134
            } else {
1135
                new_block->host = qemu_anon_ram_alloc(size);
1136
            }
1137
            memory_try_enable_merging(new_block->host, size);
1138
        }
1139
    }
1140
    new_block->length = size;
1141

    
1142
    /* Keep the list sorted from biggest to smallest block.  */
1143
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1144
        if (block->length < new_block->length) {
1145
            break;
1146
        }
1147
    }
1148
    if (block) {
1149
        QTAILQ_INSERT_BEFORE(block, new_block, next);
1150
    } else {
1151
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1152
    }
1153
    ram_list.mru_block = NULL;
1154

    
1155
    ram_list.version++;
1156
    qemu_mutex_unlock_ramlist();
1157

    
1158
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1159
                                       last_ram_offset() >> TARGET_PAGE_BITS);
1160
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1161
           0, size >> TARGET_PAGE_BITS);
1162
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1163

    
1164
    qemu_ram_setup_dump(new_block->host, size);
1165
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1166

    
1167
    if (kvm_enabled())
1168
        kvm_setup_guest_memory(new_block->host, size);
1169

    
1170
    return new_block->offset;
1171
}
1172

    
1173
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1174
{
1175
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1176
}
1177

    
1178
void qemu_ram_free_from_ptr(ram_addr_t addr)
1179
{
1180
    RAMBlock *block;
1181

    
1182
    /* This assumes the iothread lock is taken here too.  */
1183
    qemu_mutex_lock_ramlist();
1184
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1185
        if (addr == block->offset) {
1186
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1187
            ram_list.mru_block = NULL;
1188
            ram_list.version++;
1189
            g_free(block);
1190
            break;
1191
        }
1192
    }
1193
    qemu_mutex_unlock_ramlist();
1194
}
1195

    
1196
void qemu_ram_free(ram_addr_t addr)
1197
{
1198
    RAMBlock *block;
1199

    
1200
    /* This assumes the iothread lock is taken here too.  */
1201
    qemu_mutex_lock_ramlist();
1202
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1203
        if (addr == block->offset) {
1204
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1205
            ram_list.mru_block = NULL;
1206
            ram_list.version++;
1207
            if (block->flags & RAM_PREALLOC_MASK) {
1208
                ;
1209
            } else if (mem_path) {
1210
#if defined (__linux__) && !defined(TARGET_S390X)
1211
                if (block->fd) {
1212
                    munmap(block->host, block->length);
1213
                    close(block->fd);
1214
                } else {
1215
                    qemu_anon_ram_free(block->host, block->length);
1216
                }
1217
#else
1218
                abort();
1219
#endif
1220
            } else {
1221
                if (xen_enabled()) {
1222
                    xen_invalidate_map_cache_entry(block->host);
1223
                } else {
1224
                    qemu_anon_ram_free(block->host, block->length);
1225
                }
1226
            }
1227
            g_free(block);
1228
            break;
1229
        }
1230
    }
1231
    qemu_mutex_unlock_ramlist();
1232

    
1233
}
1234

    
1235
#ifndef _WIN32
1236
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1237
{
1238
    RAMBlock *block;
1239
    ram_addr_t offset;
1240
    int flags;
1241
    void *area, *vaddr;
1242

    
1243
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1244
        offset = addr - block->offset;
1245
        if (offset < block->length) {
1246
            vaddr = block->host + offset;
1247
            if (block->flags & RAM_PREALLOC_MASK) {
1248
                ;
1249
            } else {
1250
                flags = MAP_FIXED;
1251
                munmap(vaddr, length);
1252
                if (mem_path) {
1253
#if defined(__linux__) && !defined(TARGET_S390X)
1254
                    if (block->fd) {
1255
#ifdef MAP_POPULATE
1256
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1257
                            MAP_PRIVATE;
1258
#else
1259
                        flags |= MAP_PRIVATE;
1260
#endif
1261
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1262
                                    flags, block->fd, offset);
1263
                    } else {
1264
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1265
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1266
                                    flags, -1, 0);
1267
                    }
1268
#else
1269
                    abort();
1270
#endif
1271
                } else {
1272
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1273
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
1274
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1275
                                flags, -1, 0);
1276
#else
1277
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1278
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1279
                                flags, -1, 0);
1280
#endif
1281
                }
1282
                if (area != vaddr) {
1283
                    fprintf(stderr, "Could not remap addr: "
1284
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1285
                            length, addr);
1286
                    exit(1);
1287
                }
1288
                memory_try_enable_merging(vaddr, length);
1289
                qemu_ram_setup_dump(vaddr, length);
1290
            }
1291
            return;
1292
        }
1293
    }
1294
}
1295
#endif /* !_WIN32 */
1296

    
1297
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1298
   With the exception of the softmmu code in this file, this should
1299
   only be used for local memory (e.g. video ram) that the device owns,
1300
   and knows it isn't going to access beyond the end of the block.
1301

1302
   It should not be used for general purpose DMA.
1303
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1304
 */
1305
void *qemu_get_ram_ptr(ram_addr_t addr)
1306
{
1307
    RAMBlock *block;
1308

    
1309
    /* The list is protected by the iothread lock here.  */
1310
    block = ram_list.mru_block;
1311
    if (block && addr - block->offset < block->length) {
1312
        goto found;
1313
    }
1314
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1315
        if (addr - block->offset < block->length) {
1316
            goto found;
1317
        }
1318
    }
1319

    
1320
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1321
    abort();
1322

    
1323
found:
1324
    ram_list.mru_block = block;
1325
    if (xen_enabled()) {
1326
        /* We need to check if the requested address is in the RAM
1327
         * because we don't want to map the entire memory in QEMU.
1328
         * In that case just map until the end of the page.
1329
         */
1330
        if (block->offset == 0) {
1331
            return xen_map_cache(addr, 0, 0);
1332
        } else if (block->host == NULL) {
1333
            block->host =
1334
                xen_map_cache(block->offset, block->length, 1);
1335
        }
1336
    }
1337
    return block->host + (addr - block->offset);
1338
}
1339

    
1340
/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1341
 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1342
 *
1343
 * ??? Is this still necessary?
1344
 */
1345
static void *qemu_safe_ram_ptr(ram_addr_t addr)
1346
{
1347
    RAMBlock *block;
1348

    
1349
    /* The list is protected by the iothread lock here.  */
1350
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1351
        if (addr - block->offset < block->length) {
1352
            if (xen_enabled()) {
1353
                /* We need to check if the requested address is in the RAM
1354
                 * because we don't want to map the entire memory in QEMU.
1355
                 * In that case just map until the end of the page.
1356
                 */
1357
                if (block->offset == 0) {
1358
                    return xen_map_cache(addr, 0, 0);
1359
                } else if (block->host == NULL) {
1360
                    block->host =
1361
                        xen_map_cache(block->offset, block->length, 1);
1362
                }
1363
            }
1364
            return block->host + (addr - block->offset);
1365
        }
1366
    }
1367

    
1368
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1369
    abort();
1370

    
1371
    return NULL;
1372
}
1373

    
1374
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1375
 * but takes a size argument */
1376
static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1377
{
1378
    if (*size == 0) {
1379
        return NULL;
1380
    }
1381
    if (xen_enabled()) {
1382
        return xen_map_cache(addr, *size, 1);
1383
    } else {
1384
        RAMBlock *block;
1385

    
1386
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1387
            if (addr - block->offset < block->length) {
1388
                if (addr - block->offset + *size > block->length)
1389
                    *size = block->length - addr + block->offset;
1390
                return block->host + (addr - block->offset);
1391
            }
1392
        }
1393

    
1394
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1395
        abort();
1396
    }
1397
}
1398

    
1399
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1400
{
1401
    RAMBlock *block;
1402
    uint8_t *host = ptr;
1403

    
1404
    if (xen_enabled()) {
1405
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1406
        return 0;
1407
    }
1408

    
1409
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1410
        /* This case append when the block is not mapped. */
1411
        if (block->host == NULL) {
1412
            continue;
1413
        }
1414
        if (host - block->host < block->length) {
1415
            *ram_addr = block->offset + (host - block->host);
1416
            return 0;
1417
        }
1418
    }
1419

    
1420
    return -1;
1421
}
1422

    
1423
/* Some of the softmmu routines need to translate from a host pointer
1424
   (typically a TLB entry) back to a ram offset.  */
1425
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1426
{
1427
    ram_addr_t ram_addr;
1428

    
1429
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1430
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
1431
        abort();
1432
    }
1433
    return ram_addr;
1434
}
1435

    
1436
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1437
                               uint64_t val, unsigned size)
1438
{
1439
    int dirty_flags;
1440
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1441
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1442
        tb_invalidate_phys_page_fast(ram_addr, size);
1443
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1444
    }
1445
    switch (size) {
1446
    case 1:
1447
        stb_p(qemu_get_ram_ptr(ram_addr), val);
1448
        break;
1449
    case 2:
1450
        stw_p(qemu_get_ram_ptr(ram_addr), val);
1451
        break;
1452
    case 4:
1453
        stl_p(qemu_get_ram_ptr(ram_addr), val);
1454
        break;
1455
    default:
1456
        abort();
1457
    }
1458
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1459
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1460
    /* we remove the notdirty callback only if the code has been
1461
       flushed */
1462
    if (dirty_flags == 0xff)
1463
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1464
}
1465

    
1466
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1467
                                 unsigned size, bool is_write)
1468
{
1469
    return is_write;
1470
}
1471

    
1472
static const MemoryRegionOps notdirty_mem_ops = {
1473
    .write = notdirty_mem_write,
1474
    .valid.accepts = notdirty_mem_accepts,
1475
    .endianness = DEVICE_NATIVE_ENDIAN,
1476
};
1477

    
1478
/* Generate a debug exception if a watchpoint has been hit.  */
1479
static void check_watchpoint(int offset, int len_mask, int flags)
1480
{
1481
    CPUArchState *env = cpu_single_env;
1482
    target_ulong pc, cs_base;
1483
    target_ulong vaddr;
1484
    CPUWatchpoint *wp;
1485
    int cpu_flags;
1486

    
1487
    if (env->watchpoint_hit) {
1488
        /* We re-entered the check after replacing the TB. Now raise
1489
         * the debug interrupt so that is will trigger after the
1490
         * current instruction. */
1491
        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1492
        return;
1493
    }
1494
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1495
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1496
        if ((vaddr == (wp->vaddr & len_mask) ||
1497
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1498
            wp->flags |= BP_WATCHPOINT_HIT;
1499
            if (!env->watchpoint_hit) {
1500
                env->watchpoint_hit = wp;
1501
                tb_check_watchpoint(env);
1502
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1503
                    env->exception_index = EXCP_DEBUG;
1504
                    cpu_loop_exit(env);
1505
                } else {
1506
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1507
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1508
                    cpu_resume_from_signal(env, NULL);
1509
                }
1510
            }
1511
        } else {
1512
            wp->flags &= ~BP_WATCHPOINT_HIT;
1513
        }
1514
    }
1515
}
1516

    
1517
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1518
   so these check for a hit then pass through to the normal out-of-line
1519
   phys routines.  */
1520
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1521
                               unsigned size)
1522
{
1523
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1524
    switch (size) {
1525
    case 1: return ldub_phys(addr);
1526
    case 2: return lduw_phys(addr);
1527
    case 4: return ldl_phys(addr);
1528
    default: abort();
1529
    }
1530
}
1531

    
1532
static void watch_mem_write(void *opaque, hwaddr addr,
1533
                            uint64_t val, unsigned size)
1534
{
1535
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1536
    switch (size) {
1537
    case 1:
1538
        stb_phys(addr, val);
1539
        break;
1540
    case 2:
1541
        stw_phys(addr, val);
1542
        break;
1543
    case 4:
1544
        stl_phys(addr, val);
1545
        break;
1546
    default: abort();
1547
    }
1548
}
1549

    
1550
static const MemoryRegionOps watch_mem_ops = {
1551
    .read = watch_mem_read,
1552
    .write = watch_mem_write,
1553
    .endianness = DEVICE_NATIVE_ENDIAN,
1554
};
1555

    
1556
static uint64_t subpage_read(void *opaque, hwaddr addr,
1557
                             unsigned len)
1558
{
1559
    subpage_t *subpage = opaque;
1560
    uint8_t buf[4];
1561

    
1562
#if defined(DEBUG_SUBPAGE)
1563
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1564
           subpage, len, addr);
1565
#endif
1566
    address_space_read(subpage->as, addr + subpage->base, buf, len);
1567
    switch (len) {
1568
    case 1:
1569
        return ldub_p(buf);
1570
    case 2:
1571
        return lduw_p(buf);
1572
    case 4:
1573
        return ldl_p(buf);
1574
    default:
1575
        abort();
1576
    }
1577
}
1578

    
1579
static void subpage_write(void *opaque, hwaddr addr,
1580
                          uint64_t value, unsigned len)
1581
{
1582
    subpage_t *subpage = opaque;
1583
    uint8_t buf[4];
1584

    
1585
#if defined(DEBUG_SUBPAGE)
1586
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1587
           " value %"PRIx64"\n",
1588
           __func__, subpage, len, addr, value);
1589
#endif
1590
    switch (len) {
1591
    case 1:
1592
        stb_p(buf, value);
1593
        break;
1594
    case 2:
1595
        stw_p(buf, value);
1596
        break;
1597
    case 4:
1598
        stl_p(buf, value);
1599
        break;
1600
    default:
1601
        abort();
1602
    }
1603
    address_space_write(subpage->as, addr + subpage->base, buf, len);
1604
}
1605

    
1606
static bool subpage_accepts(void *opaque, hwaddr addr,
1607
                            unsigned size, bool is_write)
1608
{
1609
    subpage_t *subpage = opaque;
1610
#if defined(DEBUG_SUBPAGE)
1611
    printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1612
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
1613
#endif
1614

    
1615
    return address_space_access_valid(subpage->as, addr + subpage->base,
1616
                                      size, is_write);
1617
}
1618

    
1619
static const MemoryRegionOps subpage_ops = {
1620
    .read = subpage_read,
1621
    .write = subpage_write,
1622
    .valid.accepts = subpage_accepts,
1623
    .endianness = DEVICE_NATIVE_ENDIAN,
1624
};
1625

    
1626
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1627
                             uint16_t section)
1628
{
1629
    int idx, eidx;
1630

    
1631
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1632
        return -1;
1633
    idx = SUBPAGE_IDX(start);
1634
    eidx = SUBPAGE_IDX(end);
1635
#if defined(DEBUG_SUBPAGE)
1636
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1637
           mmio, start, end, idx, eidx, memory);
1638
#endif
1639
    for (; idx <= eidx; idx++) {
1640
        mmio->sub_section[idx] = section;
1641
    }
1642

    
1643
    return 0;
1644
}
1645

    
1646
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1647
{
1648
    subpage_t *mmio;
1649

    
1650
    mmio = g_malloc0(sizeof(subpage_t));
1651

    
1652
    mmio->as = as;
1653
    mmio->base = base;
1654
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1655
                          "subpage", TARGET_PAGE_SIZE);
1656
    mmio->iomem.subpage = true;
1657
#if defined(DEBUG_SUBPAGE)
1658
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1659
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1660
#endif
1661
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1662

    
1663
    return mmio;
1664
}
1665

    
1666
static uint16_t dummy_section(MemoryRegion *mr)
1667
{
1668
    MemoryRegionSection section = {
1669
        .mr = mr,
1670
        .offset_within_address_space = 0,
1671
        .offset_within_region = 0,
1672
        .size = int128_2_64(),
1673
    };
1674

    
1675
    return phys_section_add(&section);
1676
}
1677

    
1678
MemoryRegion *iotlb_to_region(hwaddr index)
1679
{
1680
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1681
}
1682

    
1683
static void io_mem_init(void)
1684
{
1685
    memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1686
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1687
                          "unassigned", UINT64_MAX);
1688
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1689
                          "notdirty", UINT64_MAX);
1690
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1691
                          "watch", UINT64_MAX);
1692
}
1693

    
1694
static void mem_begin(MemoryListener *listener)
1695
{
1696
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1697

    
1698
    destroy_all_mappings(d);
1699
    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1700
}
1701

    
1702
static void core_begin(MemoryListener *listener)
1703
{
1704
    phys_sections_clear();
1705
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
1706
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
1707
    phys_section_rom = dummy_section(&io_mem_rom);
1708
    phys_section_watch = dummy_section(&io_mem_watch);
1709
}
1710

    
1711
static void tcg_commit(MemoryListener *listener)
1712
{
1713
    CPUArchState *env;
1714

    
1715
    /* since each CPU stores ram addresses in its TLB cache, we must
1716
       reset the modified entries */
1717
    /* XXX: slow ! */
1718
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1719
        tlb_flush(env, 1);
1720
    }
1721
}
1722

    
1723
static void core_log_global_start(MemoryListener *listener)
1724
{
1725
    cpu_physical_memory_set_dirty_tracking(1);
1726
}
1727

    
1728
static void core_log_global_stop(MemoryListener *listener)
1729
{
1730
    cpu_physical_memory_set_dirty_tracking(0);
1731
}
1732

    
1733
static void io_region_add(MemoryListener *listener,
1734
                          MemoryRegionSection *section)
1735
{
1736
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1737

    
1738
    mrio->mr = section->mr;
1739
    mrio->offset = section->offset_within_region;
1740
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1741
                 section->offset_within_address_space,
1742
                 int128_get64(section->size));
1743
    ioport_register(&mrio->iorange);
1744
}
1745

    
1746
static void io_region_del(MemoryListener *listener,
1747
                          MemoryRegionSection *section)
1748
{
1749
    isa_unassign_ioport(section->offset_within_address_space,
1750
                        int128_get64(section->size));
1751
}
1752

    
1753
static MemoryListener core_memory_listener = {
1754
    .begin = core_begin,
1755
    .log_global_start = core_log_global_start,
1756
    .log_global_stop = core_log_global_stop,
1757
    .priority = 1,
1758
};
1759

    
1760
static MemoryListener io_memory_listener = {
1761
    .region_add = io_region_add,
1762
    .region_del = io_region_del,
1763
    .priority = 0,
1764
};
1765

    
1766
static MemoryListener tcg_memory_listener = {
1767
    .commit = tcg_commit,
1768
};
1769

    
1770
void address_space_init_dispatch(AddressSpace *as)
1771
{
1772
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1773

    
1774
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1775
    d->listener = (MemoryListener) {
1776
        .begin = mem_begin,
1777
        .region_add = mem_add,
1778
        .region_nop = mem_add,
1779
        .priority = 0,
1780
    };
1781
    d->as = as;
1782
    as->dispatch = d;
1783
    memory_listener_register(&d->listener, as);
1784
}
1785

    
1786
void address_space_destroy_dispatch(AddressSpace *as)
1787
{
1788
    AddressSpaceDispatch *d = as->dispatch;
1789

    
1790
    memory_listener_unregister(&d->listener);
1791
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1792
    g_free(d);
1793
    as->dispatch = NULL;
1794
}
1795

    
1796
static void memory_map_init(void)
1797
{
1798
    system_memory = g_malloc(sizeof(*system_memory));
1799
    memory_region_init(system_memory, "system", INT64_MAX);
1800
    address_space_init(&address_space_memory, system_memory);
1801
    address_space_memory.name = "memory";
1802

    
1803
    system_io = g_malloc(sizeof(*system_io));
1804
    memory_region_init(system_io, "io", 65536);
1805
    address_space_init(&address_space_io, system_io);
1806
    address_space_io.name = "I/O";
1807

    
1808
    memory_listener_register(&core_memory_listener, &address_space_memory);
1809
    memory_listener_register(&io_memory_listener, &address_space_io);
1810
    memory_listener_register(&tcg_memory_listener, &address_space_memory);
1811

    
1812
    dma_context_init(&dma_context_memory, &address_space_memory,
1813
                     NULL, NULL, NULL);
1814
}
1815

    
1816
MemoryRegion *get_system_memory(void)
1817
{
1818
    return system_memory;
1819
}
1820

    
1821
MemoryRegion *get_system_io(void)
1822
{
1823
    return system_io;
1824
}
1825

    
1826
#endif /* !defined(CONFIG_USER_ONLY) */
1827

    
1828
/* physical memory access (slow version, mainly for debug) */
1829
#if defined(CONFIG_USER_ONLY)
1830
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1831
                        uint8_t *buf, int len, int is_write)
1832
{
1833
    int l, flags;
1834
    target_ulong page;
1835
    void * p;
1836

    
1837
    while (len > 0) {
1838
        page = addr & TARGET_PAGE_MASK;
1839
        l = (page + TARGET_PAGE_SIZE) - addr;
1840
        if (l > len)
1841
            l = len;
1842
        flags = page_get_flags(page);
1843
        if (!(flags & PAGE_VALID))
1844
            return -1;
1845
        if (is_write) {
1846
            if (!(flags & PAGE_WRITE))
1847
                return -1;
1848
            /* XXX: this code should not depend on lock_user */
1849
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1850
                return -1;
1851
            memcpy(p, buf, l);
1852
            unlock_user(p, addr, l);
1853
        } else {
1854
            if (!(flags & PAGE_READ))
1855
                return -1;
1856
            /* XXX: this code should not depend on lock_user */
1857
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1858
                return -1;
1859
            memcpy(buf, p, l);
1860
            unlock_user(p, addr, 0);
1861
        }
1862
        len -= l;
1863
        buf += l;
1864
        addr += l;
1865
    }
1866
    return 0;
1867
}
1868

    
1869
#else
1870

    
1871
static void invalidate_and_set_dirty(hwaddr addr,
1872
                                     hwaddr length)
1873
{
1874
    if (!cpu_physical_memory_is_dirty(addr)) {
1875
        /* invalidate code */
1876
        tb_invalidate_phys_page_range(addr, addr + length, 0);
1877
        /* set dirty bit */
1878
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1879
    }
1880
    xen_modified_memory(addr, length);
1881
}
1882

    
1883
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1884
{
1885
    if (memory_region_is_ram(mr)) {
1886
        return !(is_write && mr->readonly);
1887
    }
1888
    if (memory_region_is_romd(mr)) {
1889
        return !is_write;
1890
    }
1891

    
1892
    return false;
1893
}
1894

    
1895
static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1896
{
1897
    if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1898
        return 4;
1899
    }
1900
    if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1901
        return 2;
1902
    }
1903
    return 1;
1904
}
1905

    
1906
bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1907
                      int len, bool is_write)
1908
{
1909
    hwaddr l;
1910
    uint8_t *ptr;
1911
    uint64_t val;
1912
    hwaddr addr1;
1913
    MemoryRegion *mr;
1914
    bool error = false;
1915

    
1916
    while (len > 0) {
1917
        l = len;
1918
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
1919

    
1920
        if (is_write) {
1921
            if (!memory_access_is_direct(mr, is_write)) {
1922
                l = memory_access_size(mr, l, addr1);
1923
                /* XXX: could force cpu_single_env to NULL to avoid
1924
                   potential bugs */
1925
                if (l == 4) {
1926
                    /* 32 bit write access */
1927
                    val = ldl_p(buf);
1928
                    error |= io_mem_write(mr, addr1, val, 4);
1929
                } else if (l == 2) {
1930
                    /* 16 bit write access */
1931
                    val = lduw_p(buf);
1932
                    error |= io_mem_write(mr, addr1, val, 2);
1933
                } else {
1934
                    /* 8 bit write access */
1935
                    val = ldub_p(buf);
1936
                    error |= io_mem_write(mr, addr1, val, 1);
1937
                }
1938
            } else {
1939
                addr1 += memory_region_get_ram_addr(mr);
1940
                /* RAM case */
1941
                ptr = qemu_get_ram_ptr(addr1);
1942
                memcpy(ptr, buf, l);
1943
                invalidate_and_set_dirty(addr1, l);
1944
            }
1945
        } else {
1946
            if (!memory_access_is_direct(mr, is_write)) {
1947
                /* I/O case */
1948
                l = memory_access_size(mr, l, addr1);
1949
                if (l == 4) {
1950
                    /* 32 bit read access */
1951
                    error |= io_mem_read(mr, addr1, &val, 4);
1952
                    stl_p(buf, val);
1953
                } else if (l == 2) {
1954
                    /* 16 bit read access */
1955
                    error |= io_mem_read(mr, addr1, &val, 2);
1956
                    stw_p(buf, val);
1957
                } else {
1958
                    /* 8 bit read access */
1959
                    error |= io_mem_read(mr, addr1, &val, 1);
1960
                    stb_p(buf, val);
1961
                }
1962
            } else {
1963
                /* RAM case */
1964
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
1965
                memcpy(buf, ptr, l);
1966
            }
1967
        }
1968
        len -= l;
1969
        buf += l;
1970
        addr += l;
1971
    }
1972

    
1973
    return error;
1974
}
1975

    
1976
bool address_space_write(AddressSpace *as, hwaddr addr,
1977
                         const uint8_t *buf, int len)
1978
{
1979
    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1980
}
1981

    
1982
bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1983
{
1984
    return address_space_rw(as, addr, buf, len, false);
1985
}
1986

    
1987

    
1988
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1989
                            int len, int is_write)
1990
{
1991
    address_space_rw(&address_space_memory, addr, buf, len, is_write);
1992
}
1993

    
1994
/* used for ROM loading : can write in RAM and ROM */
1995
void cpu_physical_memory_write_rom(hwaddr addr,
1996
                                   const uint8_t *buf, int len)
1997
{
1998
    hwaddr l;
1999
    uint8_t *ptr;
2000
    hwaddr addr1;
2001
    MemoryRegion *mr;
2002

    
2003
    while (len > 0) {
2004
        l = len;
2005
        mr = address_space_translate(&address_space_memory,
2006
                                     addr, &addr1, &l, true);
2007

    
2008
        if (!(memory_region_is_ram(mr) ||
2009
              memory_region_is_romd(mr))) {
2010
            /* do nothing */
2011
        } else {
2012
            addr1 += memory_region_get_ram_addr(mr);
2013
            /* ROM/RAM case */
2014
            ptr = qemu_get_ram_ptr(addr1);
2015
            memcpy(ptr, buf, l);
2016
            invalidate_and_set_dirty(addr1, l);
2017
        }
2018
        len -= l;
2019
        buf += l;
2020
        addr += l;
2021
    }
2022
}
2023

    
2024
typedef struct {
2025
    void *buffer;
2026
    hwaddr addr;
2027
    hwaddr len;
2028
} BounceBuffer;
2029

    
2030
static BounceBuffer bounce;
2031

    
2032
typedef struct MapClient {
2033
    void *opaque;
2034
    void (*callback)(void *opaque);
2035
    QLIST_ENTRY(MapClient) link;
2036
} MapClient;
2037

    
2038
static QLIST_HEAD(map_client_list, MapClient) map_client_list
2039
    = QLIST_HEAD_INITIALIZER(map_client_list);
2040

    
2041
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2042
{
2043
    MapClient *client = g_malloc(sizeof(*client));
2044

    
2045
    client->opaque = opaque;
2046
    client->callback = callback;
2047
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2048
    return client;
2049
}
2050

    
2051
static void cpu_unregister_map_client(void *_client)
2052
{
2053
    MapClient *client = (MapClient *)_client;
2054

    
2055
    QLIST_REMOVE(client, link);
2056
    g_free(client);
2057
}
2058

    
2059
static void cpu_notify_map_clients(void)
2060
{
2061
    MapClient *client;
2062

    
2063
    while (!QLIST_EMPTY(&map_client_list)) {
2064
        client = QLIST_FIRST(&map_client_list);
2065
        client->callback(client->opaque);
2066
        cpu_unregister_map_client(client);
2067
    }
2068
}
2069

    
2070
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2071
{
2072
    MemoryRegion *mr;
2073
    hwaddr l, xlat;
2074

    
2075
    while (len > 0) {
2076
        l = len;
2077
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2078
        if (!memory_access_is_direct(mr, is_write)) {
2079
            l = memory_access_size(mr, l, addr);
2080
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2081
                return false;
2082
            }
2083
        }
2084

    
2085
        len -= l;
2086
        addr += l;
2087
    }
2088
    return true;
2089
}
2090

    
2091
/* Map a physical memory region into a host virtual address.
2092
 * May map a subset of the requested range, given by and returned in *plen.
2093
 * May return NULL if resources needed to perform the mapping are exhausted.
2094
 * Use only for reads OR writes - not for read-modify-write operations.
2095
 * Use cpu_register_map_client() to know when retrying the map operation is
2096
 * likely to succeed.
2097
 */
2098
void *address_space_map(AddressSpace *as,
2099
                        hwaddr addr,
2100
                        hwaddr *plen,
2101
                        bool is_write)
2102
{
2103
    hwaddr len = *plen;
2104
    hwaddr todo = 0;
2105
    hwaddr l, xlat;
2106
    MemoryRegion *mr;
2107
    ram_addr_t raddr = RAM_ADDR_MAX;
2108
    ram_addr_t rlen;
2109
    void *ret;
2110

    
2111
    while (len > 0) {
2112
        l = len;
2113
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2114

    
2115
        if (!memory_access_is_direct(mr, is_write)) {
2116
            if (todo || bounce.buffer) {
2117
                break;
2118
            }
2119
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2120
            bounce.addr = addr;
2121
            bounce.len = l;
2122
            if (!is_write) {
2123
                address_space_read(as, addr, bounce.buffer, l);
2124
            }
2125

    
2126
            *plen = l;
2127
            return bounce.buffer;
2128
        }
2129
        if (!todo) {
2130
            raddr = memory_region_get_ram_addr(mr) + xlat;
2131
        } else {
2132
            if (memory_region_get_ram_addr(mr) + xlat != raddr + todo) {
2133
                break;
2134
            }
2135
        }
2136

    
2137
        len -= l;
2138
        addr += l;
2139
        todo += l;
2140
    }
2141
    rlen = todo;
2142
    ret = qemu_ram_ptr_length(raddr, &rlen);
2143
    *plen = rlen;
2144
    return ret;
2145
}
2146

    
2147
/* Unmaps a memory region previously mapped by address_space_map().
2148
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2149
 * the amount of memory that was actually read or written by the caller.
2150
 */
2151
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2152
                         int is_write, hwaddr access_len)
2153
{
2154
    if (buffer != bounce.buffer) {
2155
        if (is_write) {
2156
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2157
            while (access_len) {
2158
                unsigned l;
2159
                l = TARGET_PAGE_SIZE;
2160
                if (l > access_len)
2161
                    l = access_len;
2162
                invalidate_and_set_dirty(addr1, l);
2163
                addr1 += l;
2164
                access_len -= l;
2165
            }
2166
        }
2167
        if (xen_enabled()) {
2168
            xen_invalidate_map_cache_entry(buffer);
2169
        }
2170
        return;
2171
    }
2172
    if (is_write) {
2173
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2174
    }
2175
    qemu_vfree(bounce.buffer);
2176
    bounce.buffer = NULL;
2177
    cpu_notify_map_clients();
2178
}
2179

    
2180
void *cpu_physical_memory_map(hwaddr addr,
2181
                              hwaddr *plen,
2182
                              int is_write)
2183
{
2184
    return address_space_map(&address_space_memory, addr, plen, is_write);
2185
}
2186

    
2187
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2188
                               int is_write, hwaddr access_len)
2189
{
2190
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2191
}
2192

    
2193
/* warning: addr must be aligned */
2194
static inline uint32_t ldl_phys_internal(hwaddr addr,
2195
                                         enum device_endian endian)
2196
{
2197
    uint8_t *ptr;
2198
    uint64_t val;
2199
    MemoryRegion *mr;
2200
    hwaddr l = 4;
2201
    hwaddr addr1;
2202

    
2203
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2204
                                 false);
2205
    if (l < 4 || !memory_access_is_direct(mr, false)) {
2206
        /* I/O case */
2207
        io_mem_read(mr, addr1, &val, 4);
2208
#if defined(TARGET_WORDS_BIGENDIAN)
2209
        if (endian == DEVICE_LITTLE_ENDIAN) {
2210
            val = bswap32(val);
2211
        }
2212
#else
2213
        if (endian == DEVICE_BIG_ENDIAN) {
2214
            val = bswap32(val);
2215
        }
2216
#endif
2217
    } else {
2218
        /* RAM case */
2219
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2220
                                & TARGET_PAGE_MASK)
2221
                               + addr1);
2222
        switch (endian) {
2223
        case DEVICE_LITTLE_ENDIAN:
2224
            val = ldl_le_p(ptr);
2225
            break;
2226
        case DEVICE_BIG_ENDIAN:
2227
            val = ldl_be_p(ptr);
2228
            break;
2229
        default:
2230
            val = ldl_p(ptr);
2231
            break;
2232
        }
2233
    }
2234
    return val;
2235
}
2236

    
2237
uint32_t ldl_phys(hwaddr addr)
2238
{
2239
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2240
}
2241

    
2242
uint32_t ldl_le_phys(hwaddr addr)
2243
{
2244
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2245
}
2246

    
2247
uint32_t ldl_be_phys(hwaddr addr)
2248
{
2249
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2250
}
2251

    
2252
/* warning: addr must be aligned */
2253
static inline uint64_t ldq_phys_internal(hwaddr addr,
2254
                                         enum device_endian endian)
2255
{
2256
    uint8_t *ptr;
2257
    uint64_t val;
2258
    MemoryRegion *mr;
2259
    hwaddr l = 8;
2260
    hwaddr addr1;
2261

    
2262
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2263
                                 false);
2264
    if (l < 8 || !memory_access_is_direct(mr, false)) {
2265
        /* I/O case */
2266
        io_mem_read(mr, addr1, &val, 8);
2267
#if defined(TARGET_WORDS_BIGENDIAN)
2268
        if (endian == DEVICE_LITTLE_ENDIAN) {
2269
            val = bswap64(val);
2270
        }
2271
#else
2272
        if (endian == DEVICE_BIG_ENDIAN) {
2273
            val = bswap64(val);
2274
        }
2275
#endif
2276
    } else {
2277
        /* RAM case */
2278
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2279
                                & TARGET_PAGE_MASK)
2280
                               + addr1);
2281
        switch (endian) {
2282
        case DEVICE_LITTLE_ENDIAN:
2283
            val = ldq_le_p(ptr);
2284
            break;
2285
        case DEVICE_BIG_ENDIAN:
2286
            val = ldq_be_p(ptr);
2287
            break;
2288
        default:
2289
            val = ldq_p(ptr);
2290
            break;
2291
        }
2292
    }
2293
    return val;
2294
}
2295

    
2296
uint64_t ldq_phys(hwaddr addr)
2297
{
2298
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2299
}
2300

    
2301
uint64_t ldq_le_phys(hwaddr addr)
2302
{
2303
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2304
}
2305

    
2306
uint64_t ldq_be_phys(hwaddr addr)
2307
{
2308
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2309
}
2310

    
2311
/* XXX: optimize */
2312
uint32_t ldub_phys(hwaddr addr)
2313
{
2314
    uint8_t val;
2315
    cpu_physical_memory_read(addr, &val, 1);
2316
    return val;
2317
}
2318

    
2319
/* warning: addr must be aligned */
2320
static inline uint32_t lduw_phys_internal(hwaddr addr,
2321
                                          enum device_endian endian)
2322
{
2323
    uint8_t *ptr;
2324
    uint64_t val;
2325
    MemoryRegion *mr;
2326
    hwaddr l = 2;
2327
    hwaddr addr1;
2328

    
2329
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2330
                                 false);
2331
    if (l < 2 || !memory_access_is_direct(mr, false)) {
2332
        /* I/O case */
2333
        io_mem_read(mr, addr1, &val, 2);
2334
#if defined(TARGET_WORDS_BIGENDIAN)
2335
        if (endian == DEVICE_LITTLE_ENDIAN) {
2336
            val = bswap16(val);
2337
        }
2338
#else
2339
        if (endian == DEVICE_BIG_ENDIAN) {
2340
            val = bswap16(val);
2341
        }
2342
#endif
2343
    } else {
2344
        /* RAM case */
2345
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2346
                                & TARGET_PAGE_MASK)
2347
                               + addr1);
2348
        switch (endian) {
2349
        case DEVICE_LITTLE_ENDIAN:
2350
            val = lduw_le_p(ptr);
2351
            break;
2352
        case DEVICE_BIG_ENDIAN:
2353
            val = lduw_be_p(ptr);
2354
            break;
2355
        default:
2356
            val = lduw_p(ptr);
2357
            break;
2358
        }
2359
    }
2360
    return val;
2361
}
2362

    
2363
uint32_t lduw_phys(hwaddr addr)
2364
{
2365
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2366
}
2367

    
2368
uint32_t lduw_le_phys(hwaddr addr)
2369
{
2370
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2371
}
2372

    
2373
uint32_t lduw_be_phys(hwaddr addr)
2374
{
2375
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2376
}
2377

    
2378
/* warning: addr must be aligned. The ram page is not masked as dirty
2379
   and the code inside is not invalidated. It is useful if the dirty
2380
   bits are used to track modified PTEs */
2381
void stl_phys_notdirty(hwaddr addr, uint32_t val)
2382
{
2383
    uint8_t *ptr;
2384
    MemoryRegion *mr;
2385
    hwaddr l = 4;
2386
    hwaddr addr1;
2387

    
2388
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2389
                                 true);
2390
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2391
        io_mem_write(mr, addr1, val, 4);
2392
    } else {
2393
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2394
        ptr = qemu_get_ram_ptr(addr1);
2395
        stl_p(ptr, val);
2396

    
2397
        if (unlikely(in_migration)) {
2398
            if (!cpu_physical_memory_is_dirty(addr1)) {
2399
                /* invalidate code */
2400
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2401
                /* set dirty bit */
2402
                cpu_physical_memory_set_dirty_flags(
2403
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2404
            }
2405
        }
2406
    }
2407
}
2408

    
2409
/* warning: addr must be aligned */
2410
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2411
                                     enum device_endian endian)
2412
{
2413
    uint8_t *ptr;
2414
    MemoryRegion *mr;
2415
    hwaddr l = 4;
2416
    hwaddr addr1;
2417

    
2418
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2419
                                 true);
2420
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2421
#if defined(TARGET_WORDS_BIGENDIAN)
2422
        if (endian == DEVICE_LITTLE_ENDIAN) {
2423
            val = bswap32(val);
2424
        }
2425
#else
2426
        if (endian == DEVICE_BIG_ENDIAN) {
2427
            val = bswap32(val);
2428
        }
2429
#endif
2430
        io_mem_write(mr, addr1, val, 4);
2431
    } else {
2432
        /* RAM case */
2433
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2434
        ptr = qemu_get_ram_ptr(addr1);
2435
        switch (endian) {
2436
        case DEVICE_LITTLE_ENDIAN:
2437
            stl_le_p(ptr, val);
2438
            break;
2439
        case DEVICE_BIG_ENDIAN:
2440
            stl_be_p(ptr, val);
2441
            break;
2442
        default:
2443
            stl_p(ptr, val);
2444
            break;
2445
        }
2446
        invalidate_and_set_dirty(addr1, 4);
2447
    }
2448
}
2449

    
2450
void stl_phys(hwaddr addr, uint32_t val)
2451
{
2452
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2453
}
2454

    
2455
void stl_le_phys(hwaddr addr, uint32_t val)
2456
{
2457
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2458
}
2459

    
2460
void stl_be_phys(hwaddr addr, uint32_t val)
2461
{
2462
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2463
}
2464

    
2465
/* XXX: optimize */
2466
void stb_phys(hwaddr addr, uint32_t val)
2467
{
2468
    uint8_t v = val;
2469
    cpu_physical_memory_write(addr, &v, 1);
2470
}
2471

    
2472
/* warning: addr must be aligned */
2473
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2474
                                     enum device_endian endian)
2475
{
2476
    uint8_t *ptr;
2477
    MemoryRegion *mr;
2478
    hwaddr l = 2;
2479
    hwaddr addr1;
2480

    
2481
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2482
                                 true);
2483
    if (l < 2 || !memory_access_is_direct(mr, true)) {
2484
#if defined(TARGET_WORDS_BIGENDIAN)
2485
        if (endian == DEVICE_LITTLE_ENDIAN) {
2486
            val = bswap16(val);
2487
        }
2488
#else
2489
        if (endian == DEVICE_BIG_ENDIAN) {
2490
            val = bswap16(val);
2491
        }
2492
#endif
2493
        io_mem_write(mr, addr1, val, 2);
2494
    } else {
2495
        /* RAM case */
2496
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2497
        ptr = qemu_get_ram_ptr(addr1);
2498
        switch (endian) {
2499
        case DEVICE_LITTLE_ENDIAN:
2500
            stw_le_p(ptr, val);
2501
            break;
2502
        case DEVICE_BIG_ENDIAN:
2503
            stw_be_p(ptr, val);
2504
            break;
2505
        default:
2506
            stw_p(ptr, val);
2507
            break;
2508
        }
2509
        invalidate_and_set_dirty(addr1, 2);
2510
    }
2511
}
2512

    
2513
void stw_phys(hwaddr addr, uint32_t val)
2514
{
2515
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2516
}
2517

    
2518
void stw_le_phys(hwaddr addr, uint32_t val)
2519
{
2520
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2521
}
2522

    
2523
void stw_be_phys(hwaddr addr, uint32_t val)
2524
{
2525
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2526
}
2527

    
2528
/* XXX: optimize */
2529
void stq_phys(hwaddr addr, uint64_t val)
2530
{
2531
    val = tswap64(val);
2532
    cpu_physical_memory_write(addr, &val, 8);
2533
}
2534

    
2535
void stq_le_phys(hwaddr addr, uint64_t val)
2536
{
2537
    val = cpu_to_le64(val);
2538
    cpu_physical_memory_write(addr, &val, 8);
2539
}
2540

    
2541
void stq_be_phys(hwaddr addr, uint64_t val)
2542
{
2543
    val = cpu_to_be64(val);
2544
    cpu_physical_memory_write(addr, &val, 8);
2545
}
2546

    
2547
/* virtual memory access for debug (includes writing to ROM) */
2548
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2549
                        uint8_t *buf, int len, int is_write)
2550
{
2551
    int l;
2552
    hwaddr phys_addr;
2553
    target_ulong page;
2554

    
2555
    while (len > 0) {
2556
        page = addr & TARGET_PAGE_MASK;
2557
        phys_addr = cpu_get_phys_page_debug(env, page);
2558
        /* if no physical page mapped, return an error */
2559
        if (phys_addr == -1)
2560
            return -1;
2561
        l = (page + TARGET_PAGE_SIZE) - addr;
2562
        if (l > len)
2563
            l = len;
2564
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2565
        if (is_write)
2566
            cpu_physical_memory_write_rom(phys_addr, buf, l);
2567
        else
2568
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2569
        len -= l;
2570
        buf += l;
2571
        addr += l;
2572
    }
2573
    return 0;
2574
}
2575
#endif
2576

    
2577
#if !defined(CONFIG_USER_ONLY)
2578

    
2579
/*
2580
 * A helper function for the _utterly broken_ virtio device model to find out if
2581
 * it's running on a big endian machine. Don't do this at home kids!
2582
 */
2583
bool virtio_is_big_endian(void);
2584
bool virtio_is_big_endian(void)
2585
{
2586
#if defined(TARGET_WORDS_BIGENDIAN)
2587
    return true;
2588
#else
2589
    return false;
2590
#endif
2591
}
2592

    
2593
#endif
2594

    
2595
#ifndef CONFIG_USER_ONLY
2596
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2597
{
2598
    MemoryRegion*mr;
2599
    hwaddr l = 1;
2600

    
2601
    mr = address_space_translate(&address_space_memory,
2602
                                 phys_addr, &phys_addr, &l, false);
2603

    
2604
    return !(memory_region_is_ram(mr) ||
2605
             memory_region_is_romd(mr));
2606
}
2607
#endif