Statistics
| Branch: | Revision:

root / exec.c @ 2ff3de68

History | View | Annotate | Download (72.2 kB)

1
/*
2
 *  Virtual page mapping
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "qemu/osdep.h"
33
#include "sysemu/kvm.h"
34
#include "sysemu/sysemu.h"
35
#include "hw/xen/xen.h"
36
#include "qemu/timer.h"
37
#include "qemu/config-file.h"
38
#include "exec/memory.h"
39
#include "sysemu/dma.h"
40
#include "exec/address-spaces.h"
41
#if defined(CONFIG_USER_ONLY)
42
#include <qemu.h>
43
#else /* !CONFIG_USER_ONLY */
44
#include "sysemu/xen-mapcache.h"
45
#include "trace.h"
46
#endif
47
#include "exec/cpu-all.h"
48

    
49
#include "exec/cputlb.h"
50
#include "translate-all.h"
51

    
52
#include "exec/memory-internal.h"
53

    
54
//#define DEBUG_SUBPAGE
55

    
56
#if !defined(CONFIG_USER_ONLY)
57
static int in_migration;
58

    
59
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60

    
61
static MemoryRegion *system_memory;
62
static MemoryRegion *system_io;
63

    
64
AddressSpace address_space_io;
65
AddressSpace address_space_memory;
66

    
67
MemoryRegion io_mem_rom, io_mem_notdirty;
68
static MemoryRegion io_mem_unassigned;
69

    
70
#endif
71

    
72
CPUArchState *first_cpu;
73
/* current CPU in the current thread. It is only valid inside
74
   cpu_exec() */
75
DEFINE_TLS(CPUArchState *,cpu_single_env);
76
/* 0 = Do not count executed instructions.
77
   1 = Precise instruction counting.
78
   2 = Adaptive rate instruction counting.  */
79
int use_icount;
80

    
81
#if !defined(CONFIG_USER_ONLY)
82

    
83
typedef struct PhysPageEntry PhysPageEntry;
84

    
85
struct PhysPageEntry {
86
    uint16_t is_leaf : 1;
87
     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
88
    uint16_t ptr : 15;
89
};
90

    
91
typedef PhysPageEntry Node[L2_SIZE];
92

    
93
struct AddressSpaceDispatch {
94
    /* This is a multi-level map on the physical address space.
95
     * The bottom level has pointers to MemoryRegionSections.
96
     */
97
    PhysPageEntry phys_map;
98
    Node *nodes;
99
    MemoryRegionSection *sections;
100
    AddressSpace *as;
101
};
102

    
103
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
104
typedef struct subpage_t {
105
    MemoryRegion iomem;
106
    AddressSpace *as;
107
    hwaddr base;
108
    uint16_t sub_section[TARGET_PAGE_SIZE];
109
} subpage_t;
110

    
111
#define PHYS_SECTION_UNASSIGNED 0
112
#define PHYS_SECTION_NOTDIRTY 1
113
#define PHYS_SECTION_ROM 2
114
#define PHYS_SECTION_WATCH 3
115

    
116
typedef struct PhysPageMap {
117
    unsigned sections_nb;
118
    unsigned sections_nb_alloc;
119
    unsigned nodes_nb;
120
    unsigned nodes_nb_alloc;
121
    Node *nodes;
122
    MemoryRegionSection *sections;
123
} PhysPageMap;
124

    
125
static PhysPageMap *prev_map;
126
static PhysPageMap next_map;
127

    
128
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
129

    
130
static void io_mem_init(void);
131
static void memory_map_init(void);
132
static void *qemu_safe_ram_ptr(ram_addr_t addr);
133

    
134
static MemoryRegion io_mem_watch;
135
#endif
136

    
137
#if !defined(CONFIG_USER_ONLY)
138

    
139
static void phys_map_node_reserve(unsigned nodes)
140
{
141
    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
142
        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
143
                                            16);
144
        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
145
                                      next_map.nodes_nb + nodes);
146
        next_map.nodes = g_renew(Node, next_map.nodes,
147
                                 next_map.nodes_nb_alloc);
148
    }
149
}
150

    
151
static uint16_t phys_map_node_alloc(void)
152
{
153
    unsigned i;
154
    uint16_t ret;
155

    
156
    ret = next_map.nodes_nb++;
157
    assert(ret != PHYS_MAP_NODE_NIL);
158
    assert(ret != next_map.nodes_nb_alloc);
159
    for (i = 0; i < L2_SIZE; ++i) {
160
        next_map.nodes[ret][i].is_leaf = 0;
161
        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
162
    }
163
    return ret;
164
}
165

    
166
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
167
                                hwaddr *nb, uint16_t leaf,
168
                                int level)
169
{
170
    PhysPageEntry *p;
171
    int i;
172
    hwaddr step = (hwaddr)1 << (level * L2_BITS);
173

    
174
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
175
        lp->ptr = phys_map_node_alloc();
176
        p = next_map.nodes[lp->ptr];
177
        if (level == 0) {
178
            for (i = 0; i < L2_SIZE; i++) {
179
                p[i].is_leaf = 1;
180
                p[i].ptr = PHYS_SECTION_UNASSIGNED;
181
            }
182
        }
183
    } else {
184
        p = next_map.nodes[lp->ptr];
185
    }
186
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
187

    
188
    while (*nb && lp < &p[L2_SIZE]) {
189
        if ((*index & (step - 1)) == 0 && *nb >= step) {
190
            lp->is_leaf = true;
191
            lp->ptr = leaf;
192
            *index += step;
193
            *nb -= step;
194
        } else {
195
            phys_page_set_level(lp, index, nb, leaf, level - 1);
196
        }
197
        ++lp;
198
    }
199
}
200

    
201
static void phys_page_set(AddressSpaceDispatch *d,
202
                          hwaddr index, hwaddr nb,
203
                          uint16_t leaf)
204
{
205
    /* Wildly overreserve - it doesn't matter much. */
206
    phys_map_node_reserve(3 * P_L2_LEVELS);
207

    
208
    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
209
}
210

    
211
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
212
                                           Node *nodes, MemoryRegionSection *sections)
213
{
214
    PhysPageEntry *p;
215
    int i;
216

    
217
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
218
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
219
            return &sections[PHYS_SECTION_UNASSIGNED];
220
        }
221
        p = nodes[lp.ptr];
222
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
223
    }
224
    return &sections[lp.ptr];
225
}
226

    
227
bool memory_region_is_unassigned(MemoryRegion *mr)
228
{
229
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
230
        && mr != &io_mem_watch;
231
}
232

    
233
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
234
                                                        hwaddr addr,
235
                                                        bool resolve_subpage)
236
{
237
    MemoryRegionSection *section;
238
    subpage_t *subpage;
239

    
240
    section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
241
                             d->nodes, d->sections);
242
    if (resolve_subpage && section->mr->subpage) {
243
        subpage = container_of(section->mr, subpage_t, iomem);
244
        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
245
    }
246
    return section;
247
}
248

    
249
static MemoryRegionSection *
250
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
251
                                 hwaddr *plen, bool resolve_subpage)
252
{
253
    MemoryRegionSection *section;
254
    Int128 diff;
255

    
256
    section = address_space_lookup_region(d, addr, resolve_subpage);
257
    /* Compute offset within MemoryRegionSection */
258
    addr -= section->offset_within_address_space;
259

    
260
    /* Compute offset within MemoryRegion */
261
    *xlat = addr + section->offset_within_region;
262

    
263
    diff = int128_sub(section->mr->size, int128_make64(addr));
264
    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
265
    return section;
266
}
267

    
268
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
269
                                      hwaddr *xlat, hwaddr *plen,
270
                                      bool is_write)
271
{
272
    IOMMUTLBEntry iotlb;
273
    MemoryRegionSection *section;
274
    MemoryRegion *mr;
275
    hwaddr len = *plen;
276

    
277
    for (;;) {
278
        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
279
        mr = section->mr;
280

    
281
        if (!mr->iommu_ops) {
282
            break;
283
        }
284

    
285
        iotlb = mr->iommu_ops->translate(mr, addr);
286
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
287
                | (addr & iotlb.addr_mask));
288
        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
289
        if (!(iotlb.perm & (1 << is_write))) {
290
            mr = &io_mem_unassigned;
291
            break;
292
        }
293

    
294
        as = iotlb.target_as;
295
    }
296

    
297
    *plen = len;
298
    *xlat = addr;
299
    return mr;
300
}
301

    
302
MemoryRegionSection *
303
address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
304
                                  hwaddr *plen)
305
{
306
    MemoryRegionSection *section;
307
    section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
308

    
309
    assert(!section->mr->iommu_ops);
310
    return section;
311
}
312
#endif
313

    
314
void cpu_exec_init_all(void)
315
{
316
#if !defined(CONFIG_USER_ONLY)
317
    qemu_mutex_init(&ram_list.mutex);
318
    memory_map_init();
319
    io_mem_init();
320
#endif
321
}
322

    
323
#if !defined(CONFIG_USER_ONLY)
324

    
325
static int cpu_common_post_load(void *opaque, int version_id)
326
{
327
    CPUState *cpu = opaque;
328

    
329
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
330
       version_id is increased. */
331
    cpu->interrupt_request &= ~0x01;
332
    tlb_flush(cpu->env_ptr, 1);
333

    
334
    return 0;
335
}
336

    
337
const VMStateDescription vmstate_cpu_common = {
338
    .name = "cpu_common",
339
    .version_id = 1,
340
    .minimum_version_id = 1,
341
    .minimum_version_id_old = 1,
342
    .post_load = cpu_common_post_load,
343
    .fields      = (VMStateField []) {
344
        VMSTATE_UINT32(halted, CPUState),
345
        VMSTATE_UINT32(interrupt_request, CPUState),
346
        VMSTATE_END_OF_LIST()
347
    }
348
};
349

    
350
#endif
351

    
352
CPUState *qemu_get_cpu(int index)
353
{
354
    CPUArchState *env = first_cpu;
355
    CPUState *cpu = NULL;
356

    
357
    while (env) {
358
        cpu = ENV_GET_CPU(env);
359
        if (cpu->cpu_index == index) {
360
            break;
361
        }
362
        env = env->next_cpu;
363
    }
364

    
365
    return env ? cpu : NULL;
366
}
367

    
368
void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
369
{
370
    CPUArchState *env = first_cpu;
371

    
372
    while (env) {
373
        func(ENV_GET_CPU(env), data);
374
        env = env->next_cpu;
375
    }
376
}
377

    
378
void cpu_exec_init(CPUArchState *env)
379
{
380
    CPUState *cpu = ENV_GET_CPU(env);
381
    CPUClass *cc = CPU_GET_CLASS(cpu);
382
    CPUArchState **penv;
383
    int cpu_index;
384

    
385
#if defined(CONFIG_USER_ONLY)
386
    cpu_list_lock();
387
#endif
388
    env->next_cpu = NULL;
389
    penv = &first_cpu;
390
    cpu_index = 0;
391
    while (*penv != NULL) {
392
        penv = &(*penv)->next_cpu;
393
        cpu_index++;
394
    }
395
    cpu->cpu_index = cpu_index;
396
    cpu->numa_node = 0;
397
    QTAILQ_INIT(&env->breakpoints);
398
    QTAILQ_INIT(&env->watchpoints);
399
#ifndef CONFIG_USER_ONLY
400
    cpu->thread_id = qemu_get_thread_id();
401
#endif
402
    *penv = env;
403
#if defined(CONFIG_USER_ONLY)
404
    cpu_list_unlock();
405
#endif
406
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
407
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
408
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
409
                    cpu_save, cpu_load, env);
410
    assert(cc->vmsd == NULL);
411
#endif
412
    if (cc->vmsd != NULL) {
413
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
414
    }
415
}
416

    
417
#if defined(TARGET_HAS_ICE)
418
#if defined(CONFIG_USER_ONLY)
419
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
420
{
421
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
422
}
423
#else
424
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
425
{
426
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
427
            (pc & ~TARGET_PAGE_MASK));
428
}
429
#endif
430
#endif /* TARGET_HAS_ICE */
431

    
432
#if defined(CONFIG_USER_ONLY)
433
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
434

    
435
{
436
}
437

    
438
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
439
                          int flags, CPUWatchpoint **watchpoint)
440
{
441
    return -ENOSYS;
442
}
443
#else
444
/* Add a watchpoint.  */
445
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
446
                          int flags, CPUWatchpoint **watchpoint)
447
{
448
    target_ulong len_mask = ~(len - 1);
449
    CPUWatchpoint *wp;
450

    
451
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
452
    if ((len & (len - 1)) || (addr & ~len_mask) ||
453
            len == 0 || len > TARGET_PAGE_SIZE) {
454
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
455
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
456
        return -EINVAL;
457
    }
458
    wp = g_malloc(sizeof(*wp));
459

    
460
    wp->vaddr = addr;
461
    wp->len_mask = len_mask;
462
    wp->flags = flags;
463

    
464
    /* keep all GDB-injected watchpoints in front */
465
    if (flags & BP_GDB)
466
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
467
    else
468
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
469

    
470
    tlb_flush_page(env, addr);
471

    
472
    if (watchpoint)
473
        *watchpoint = wp;
474
    return 0;
475
}
476

    
477
/* Remove a specific watchpoint.  */
478
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
479
                          int flags)
480
{
481
    target_ulong len_mask = ~(len - 1);
482
    CPUWatchpoint *wp;
483

    
484
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
485
        if (addr == wp->vaddr && len_mask == wp->len_mask
486
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
487
            cpu_watchpoint_remove_by_ref(env, wp);
488
            return 0;
489
        }
490
    }
491
    return -ENOENT;
492
}
493

    
494
/* Remove a specific watchpoint by reference.  */
495
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
496
{
497
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
498

    
499
    tlb_flush_page(env, watchpoint->vaddr);
500

    
501
    g_free(watchpoint);
502
}
503

    
504
/* Remove all matching watchpoints.  */
505
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
506
{
507
    CPUWatchpoint *wp, *next;
508

    
509
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
510
        if (wp->flags & mask)
511
            cpu_watchpoint_remove_by_ref(env, wp);
512
    }
513
}
514
#endif
515

    
516
/* Add a breakpoint.  */
517
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
518
                          CPUBreakpoint **breakpoint)
519
{
520
#if defined(TARGET_HAS_ICE)
521
    CPUBreakpoint *bp;
522

    
523
    bp = g_malloc(sizeof(*bp));
524

    
525
    bp->pc = pc;
526
    bp->flags = flags;
527

    
528
    /* keep all GDB-injected breakpoints in front */
529
    if (flags & BP_GDB)
530
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
531
    else
532
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
533

    
534
    breakpoint_invalidate(env, pc);
535

    
536
    if (breakpoint)
537
        *breakpoint = bp;
538
    return 0;
539
#else
540
    return -ENOSYS;
541
#endif
542
}
543

    
544
/* Remove a specific breakpoint.  */
545
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
546
{
547
#if defined(TARGET_HAS_ICE)
548
    CPUBreakpoint *bp;
549

    
550
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
551
        if (bp->pc == pc && bp->flags == flags) {
552
            cpu_breakpoint_remove_by_ref(env, bp);
553
            return 0;
554
        }
555
    }
556
    return -ENOENT;
557
#else
558
    return -ENOSYS;
559
#endif
560
}
561

    
562
/* Remove a specific breakpoint by reference.  */
563
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
564
{
565
#if defined(TARGET_HAS_ICE)
566
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
567

    
568
    breakpoint_invalidate(env, breakpoint->pc);
569

    
570
    g_free(breakpoint);
571
#endif
572
}
573

    
574
/* Remove all matching breakpoints. */
575
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
576
{
577
#if defined(TARGET_HAS_ICE)
578
    CPUBreakpoint *bp, *next;
579

    
580
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
581
        if (bp->flags & mask)
582
            cpu_breakpoint_remove_by_ref(env, bp);
583
    }
584
#endif
585
}
586

    
587
/* enable or disable single step mode. EXCP_DEBUG is returned by the
588
   CPU loop after each instruction */
589
void cpu_single_step(CPUArchState *env, int enabled)
590
{
591
#if defined(TARGET_HAS_ICE)
592
    if (env->singlestep_enabled != enabled) {
593
        env->singlestep_enabled = enabled;
594
        if (kvm_enabled())
595
            kvm_update_guest_debug(env, 0);
596
        else {
597
            /* must flush all the translated code to avoid inconsistencies */
598
            /* XXX: only flush what is necessary */
599
            tb_flush(env);
600
        }
601
    }
602
#endif
603
}
604

    
605
void cpu_abort(CPUArchState *env, const char *fmt, ...)
606
{
607
    CPUState *cpu = ENV_GET_CPU(env);
608
    va_list ap;
609
    va_list ap2;
610

    
611
    va_start(ap, fmt);
612
    va_copy(ap2, ap);
613
    fprintf(stderr, "qemu: fatal: ");
614
    vfprintf(stderr, fmt, ap);
615
    fprintf(stderr, "\n");
616
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
617
    if (qemu_log_enabled()) {
618
        qemu_log("qemu: fatal: ");
619
        qemu_log_vprintf(fmt, ap2);
620
        qemu_log("\n");
621
        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
622
        qemu_log_flush();
623
        qemu_log_close();
624
    }
625
    va_end(ap2);
626
    va_end(ap);
627
#if defined(CONFIG_USER_ONLY)
628
    {
629
        struct sigaction act;
630
        sigfillset(&act.sa_mask);
631
        act.sa_handler = SIG_DFL;
632
        sigaction(SIGABRT, &act, NULL);
633
    }
634
#endif
635
    abort();
636
}
637

    
638
CPUArchState *cpu_copy(CPUArchState *env)
639
{
640
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
641
    CPUArchState *next_cpu = new_env->next_cpu;
642
#if defined(TARGET_HAS_ICE)
643
    CPUBreakpoint *bp;
644
    CPUWatchpoint *wp;
645
#endif
646

    
647
    memcpy(new_env, env, sizeof(CPUArchState));
648

    
649
    /* Preserve chaining. */
650
    new_env->next_cpu = next_cpu;
651

    
652
    /* Clone all break/watchpoints.
653
       Note: Once we support ptrace with hw-debug register access, make sure
654
       BP_CPU break/watchpoints are handled correctly on clone. */
655
    QTAILQ_INIT(&env->breakpoints);
656
    QTAILQ_INIT(&env->watchpoints);
657
#if defined(TARGET_HAS_ICE)
658
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
659
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
660
    }
661
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
662
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
663
                              wp->flags, NULL);
664
    }
665
#endif
666

    
667
    return new_env;
668
}
669

    
670
#if !defined(CONFIG_USER_ONLY)
671
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
672
                                      uintptr_t length)
673
{
674
    uintptr_t start1;
675

    
676
    /* we modify the TLB cache so that the dirty bit will be set again
677
       when accessing the range */
678
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
679
    /* Check that we don't span multiple blocks - this breaks the
680
       address comparisons below.  */
681
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
682
            != (end - 1) - start) {
683
        abort();
684
    }
685
    cpu_tlb_reset_dirty_all(start1, length);
686

    
687
}
688

    
689
/* Note: start and end must be within the same ram block.  */
690
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
691
                                     int dirty_flags)
692
{
693
    uintptr_t length;
694

    
695
    start &= TARGET_PAGE_MASK;
696
    end = TARGET_PAGE_ALIGN(end);
697

    
698
    length = end - start;
699
    if (length == 0)
700
        return;
701
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
702

    
703
    if (tcg_enabled()) {
704
        tlb_reset_dirty_range_all(start, end, length);
705
    }
706
}
707

    
708
static int cpu_physical_memory_set_dirty_tracking(int enable)
709
{
710
    int ret = 0;
711
    in_migration = enable;
712
    return ret;
713
}
714

    
715
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
716
                                       MemoryRegionSection *section,
717
                                       target_ulong vaddr,
718
                                       hwaddr paddr, hwaddr xlat,
719
                                       int prot,
720
                                       target_ulong *address)
721
{
722
    hwaddr iotlb;
723
    CPUWatchpoint *wp;
724

    
725
    if (memory_region_is_ram(section->mr)) {
726
        /* Normal RAM.  */
727
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
728
            + xlat;
729
        if (!section->readonly) {
730
            iotlb |= PHYS_SECTION_NOTDIRTY;
731
        } else {
732
            iotlb |= PHYS_SECTION_ROM;
733
        }
734
    } else {
735
        iotlb = section - address_space_memory.dispatch->sections;
736
        iotlb += xlat;
737
    }
738

    
739
    /* Make accesses to pages with watchpoints go via the
740
       watchpoint trap routines.  */
741
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
742
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
743
            /* Avoid trapping reads of pages with a write breakpoint. */
744
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
745
                iotlb = PHYS_SECTION_WATCH + paddr;
746
                *address |= TLB_MMIO;
747
                break;
748
            }
749
        }
750
    }
751

    
752
    return iotlb;
753
}
754
#endif /* defined(CONFIG_USER_ONLY) */
755

    
756
#if !defined(CONFIG_USER_ONLY)
757

    
758
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
759
                             uint16_t section);
760
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
761

    
762
static uint16_t phys_section_add(MemoryRegionSection *section)
763
{
764
    /* The physical section number is ORed with a page-aligned
765
     * pointer to produce the iotlb entries.  Thus it should
766
     * never overflow into the page-aligned value.
767
     */
768
    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
769

    
770
    if (next_map.sections_nb == next_map.sections_nb_alloc) {
771
        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
772
                                         16);
773
        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
774
                                    next_map.sections_nb_alloc);
775
    }
776
    next_map.sections[next_map.sections_nb] = *section;
777
    memory_region_ref(section->mr);
778
    return next_map.sections_nb++;
779
}
780

    
781
static void phys_section_destroy(MemoryRegion *mr)
782
{
783
    memory_region_unref(mr);
784

    
785
    if (mr->subpage) {
786
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
787
        memory_region_destroy(&subpage->iomem);
788
        g_free(subpage);
789
    }
790
}
791

    
792
static void phys_sections_free(PhysPageMap *map)
793
{
794
    while (map->sections_nb > 0) {
795
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
796
        phys_section_destroy(section->mr);
797
    }
798
    g_free(map->sections);
799
    g_free(map->nodes);
800
    g_free(map);
801
}
802

    
803
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
804
{
805
    subpage_t *subpage;
806
    hwaddr base = section->offset_within_address_space
807
        & TARGET_PAGE_MASK;
808
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
809
                                                   next_map.nodes, next_map.sections);
810
    MemoryRegionSection subsection = {
811
        .offset_within_address_space = base,
812
        .size = int128_make64(TARGET_PAGE_SIZE),
813
    };
814
    hwaddr start, end;
815

    
816
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
817

    
818
    if (!(existing->mr->subpage)) {
819
        subpage = subpage_init(d->as, base);
820
        subsection.mr = &subpage->iomem;
821
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
822
                      phys_section_add(&subsection));
823
    } else {
824
        subpage = container_of(existing->mr, subpage_t, iomem);
825
    }
826
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
827
    end = start + int128_get64(section->size) - 1;
828
    subpage_register(subpage, start, end, phys_section_add(section));
829
}
830

    
831

    
832
static void register_multipage(AddressSpaceDispatch *d,
833
                               MemoryRegionSection *section)
834
{
835
    hwaddr start_addr = section->offset_within_address_space;
836
    uint16_t section_index = phys_section_add(section);
837
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
838
                                                    TARGET_PAGE_BITS));
839

    
840
    assert(num_pages);
841
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
842
}
843

    
844
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
845
{
846
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
847
    AddressSpaceDispatch *d = as->next_dispatch;
848
    MemoryRegionSection now = *section, remain = *section;
849
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
850

    
851
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
852
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
853
                       - now.offset_within_address_space;
854

    
855
        now.size = int128_min(int128_make64(left), now.size);
856
        register_subpage(d, &now);
857
    } else {
858
        now.size = int128_zero();
859
    }
860
    while (int128_ne(remain.size, now.size)) {
861
        remain.size = int128_sub(remain.size, now.size);
862
        remain.offset_within_address_space += int128_get64(now.size);
863
        remain.offset_within_region += int128_get64(now.size);
864
        now = remain;
865
        if (int128_lt(remain.size, page_size)) {
866
            register_subpage(d, &now);
867
        } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
868
            now.size = page_size;
869
            register_subpage(d, &now);
870
        } else {
871
            now.size = int128_and(now.size, int128_neg(page_size));
872
            register_multipage(d, &now);
873
        }
874
    }
875
}
876

    
877
void qemu_flush_coalesced_mmio_buffer(void)
878
{
879
    if (kvm_enabled())
880
        kvm_flush_coalesced_mmio_buffer();
881
}
882

    
883
void qemu_mutex_lock_ramlist(void)
884
{
885
    qemu_mutex_lock(&ram_list.mutex);
886
}
887

    
888
void qemu_mutex_unlock_ramlist(void)
889
{
890
    qemu_mutex_unlock(&ram_list.mutex);
891
}
892

    
893
#if defined(__linux__) && !defined(TARGET_S390X)
894

    
895
#include <sys/vfs.h>
896

    
897
#define HUGETLBFS_MAGIC       0x958458f6
898

    
899
static long gethugepagesize(const char *path)
900
{
901
    struct statfs fs;
902
    int ret;
903

    
904
    do {
905
        ret = statfs(path, &fs);
906
    } while (ret != 0 && errno == EINTR);
907

    
908
    if (ret != 0) {
909
        perror(path);
910
        return 0;
911
    }
912

    
913
    if (fs.f_type != HUGETLBFS_MAGIC)
914
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
915

    
916
    return fs.f_bsize;
917
}
918

    
919
static void *file_ram_alloc(RAMBlock *block,
920
                            ram_addr_t memory,
921
                            const char *path)
922
{
923
    char *filename;
924
    char *sanitized_name;
925
    char *c;
926
    void *area;
927
    int fd;
928
#ifdef MAP_POPULATE
929
    int flags;
930
#endif
931
    unsigned long hpagesize;
932

    
933
    hpagesize = gethugepagesize(path);
934
    if (!hpagesize) {
935
        return NULL;
936
    }
937

    
938
    if (memory < hpagesize) {
939
        return NULL;
940
    }
941

    
942
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
943
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
944
        return NULL;
945
    }
946

    
947
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
948
    sanitized_name = g_strdup(block->mr->name);
949
    for (c = sanitized_name; *c != '\0'; c++) {
950
        if (*c == '/')
951
            *c = '_';
952
    }
953

    
954
    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
955
                               sanitized_name);
956
    g_free(sanitized_name);
957

    
958
    fd = mkstemp(filename);
959
    if (fd < 0) {
960
        perror("unable to create backing store for hugepages");
961
        g_free(filename);
962
        return NULL;
963
    }
964
    unlink(filename);
965
    g_free(filename);
966

    
967
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
968

    
969
    /*
970
     * ftruncate is not supported by hugetlbfs in older
971
     * hosts, so don't bother bailing out on errors.
972
     * If anything goes wrong with it under other filesystems,
973
     * mmap will fail.
974
     */
975
    if (ftruncate(fd, memory))
976
        perror("ftruncate");
977

    
978
#ifdef MAP_POPULATE
979
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
980
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
981
     * to sidestep this quirk.
982
     */
983
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
984
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
985
#else
986
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
987
#endif
988
    if (area == MAP_FAILED) {
989
        perror("file_ram_alloc: can't mmap RAM pages");
990
        close(fd);
991
        return (NULL);
992
    }
993
    block->fd = fd;
994
    return area;
995
}
996
#endif
997

    
998
static ram_addr_t find_ram_offset(ram_addr_t size)
999
{
1000
    RAMBlock *block, *next_block;
1001
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1002

    
1003
    assert(size != 0); /* it would hand out same offset multiple times */
1004

    
1005
    if (QTAILQ_EMPTY(&ram_list.blocks))
1006
        return 0;
1007

    
1008
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1009
        ram_addr_t end, next = RAM_ADDR_MAX;
1010

    
1011
        end = block->offset + block->length;
1012

    
1013
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1014
            if (next_block->offset >= end) {
1015
                next = MIN(next, next_block->offset);
1016
            }
1017
        }
1018
        if (next - end >= size && next - end < mingap) {
1019
            offset = end;
1020
            mingap = next - end;
1021
        }
1022
    }
1023

    
1024
    if (offset == RAM_ADDR_MAX) {
1025
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1026
                (uint64_t)size);
1027
        abort();
1028
    }
1029

    
1030
    return offset;
1031
}
1032

    
1033
ram_addr_t last_ram_offset(void)
1034
{
1035
    RAMBlock *block;
1036
    ram_addr_t last = 0;
1037

    
1038
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
1039
        last = MAX(last, block->offset + block->length);
1040

    
1041
    return last;
1042
}
1043

    
1044
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1045
{
1046
    int ret;
1047

    
1048
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1049
    if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1050
                           "dump-guest-core", true)) {
1051
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1052
        if (ret) {
1053
            perror("qemu_madvise");
1054
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1055
                            "but dump_guest_core=off specified\n");
1056
        }
1057
    }
1058
}
1059

    
1060
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1061
{
1062
    RAMBlock *new_block, *block;
1063

    
1064
    new_block = NULL;
1065
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1066
        if (block->offset == addr) {
1067
            new_block = block;
1068
            break;
1069
        }
1070
    }
1071
    assert(new_block);
1072
    assert(!new_block->idstr[0]);
1073

    
1074
    if (dev) {
1075
        char *id = qdev_get_dev_path(dev);
1076
        if (id) {
1077
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1078
            g_free(id);
1079
        }
1080
    }
1081
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1082

    
1083
    /* This assumes the iothread lock is taken here too.  */
1084
    qemu_mutex_lock_ramlist();
1085
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1086
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1087
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1088
                    new_block->idstr);
1089
            abort();
1090
        }
1091
    }
1092
    qemu_mutex_unlock_ramlist();
1093
}
1094

    
1095
static int memory_try_enable_merging(void *addr, size_t len)
1096
{
1097
    if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1098
        /* disabled by the user */
1099
        return 0;
1100
    }
1101

    
1102
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1103
}
1104

    
1105
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1106
                                   MemoryRegion *mr)
1107
{
1108
    RAMBlock *block, *new_block;
1109

    
1110
    size = TARGET_PAGE_ALIGN(size);
1111
    new_block = g_malloc0(sizeof(*new_block));
1112

    
1113
    /* This assumes the iothread lock is taken here too.  */
1114
    qemu_mutex_lock_ramlist();
1115
    new_block->mr = mr;
1116
    new_block->offset = find_ram_offset(size);
1117
    if (host) {
1118
        new_block->host = host;
1119
        new_block->flags |= RAM_PREALLOC_MASK;
1120
    } else {
1121
        if (mem_path) {
1122
#if defined (__linux__) && !defined(TARGET_S390X)
1123
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1124
            if (!new_block->host) {
1125
                new_block->host = qemu_anon_ram_alloc(size);
1126
                memory_try_enable_merging(new_block->host, size);
1127
            }
1128
#else
1129
            fprintf(stderr, "-mem-path option unsupported\n");
1130
            exit(1);
1131
#endif
1132
        } else {
1133
            if (xen_enabled()) {
1134
                xen_ram_alloc(new_block->offset, size, mr);
1135
            } else if (kvm_enabled()) {
1136
                /* some s390/kvm configurations have special constraints */
1137
                new_block->host = kvm_ram_alloc(size);
1138
            } else {
1139
                new_block->host = qemu_anon_ram_alloc(size);
1140
            }
1141
            memory_try_enable_merging(new_block->host, size);
1142
        }
1143
    }
1144
    new_block->length = size;
1145

    
1146
    /* Keep the list sorted from biggest to smallest block.  */
1147
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1148
        if (block->length < new_block->length) {
1149
            break;
1150
        }
1151
    }
1152
    if (block) {
1153
        QTAILQ_INSERT_BEFORE(block, new_block, next);
1154
    } else {
1155
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1156
    }
1157
    ram_list.mru_block = NULL;
1158

    
1159
    ram_list.version++;
1160
    qemu_mutex_unlock_ramlist();
1161

    
1162
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1163
                                       last_ram_offset() >> TARGET_PAGE_BITS);
1164
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1165
           0, size >> TARGET_PAGE_BITS);
1166
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1167

    
1168
    qemu_ram_setup_dump(new_block->host, size);
1169
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1170

    
1171
    if (kvm_enabled())
1172
        kvm_setup_guest_memory(new_block->host, size);
1173

    
1174
    return new_block->offset;
1175
}
1176

    
1177
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1178
{
1179
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1180
}
1181

    
1182
void qemu_ram_free_from_ptr(ram_addr_t addr)
1183
{
1184
    RAMBlock *block;
1185

    
1186
    /* This assumes the iothread lock is taken here too.  */
1187
    qemu_mutex_lock_ramlist();
1188
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1189
        if (addr == block->offset) {
1190
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1191
            ram_list.mru_block = NULL;
1192
            ram_list.version++;
1193
            g_free(block);
1194
            break;
1195
        }
1196
    }
1197
    qemu_mutex_unlock_ramlist();
1198
}
1199

    
1200
void qemu_ram_free(ram_addr_t addr)
1201
{
1202
    RAMBlock *block;
1203

    
1204
    /* This assumes the iothread lock is taken here too.  */
1205
    qemu_mutex_lock_ramlist();
1206
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1207
        if (addr == block->offset) {
1208
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1209
            ram_list.mru_block = NULL;
1210
            ram_list.version++;
1211
            if (block->flags & RAM_PREALLOC_MASK) {
1212
                ;
1213
            } else if (mem_path) {
1214
#if defined (__linux__) && !defined(TARGET_S390X)
1215
                if (block->fd) {
1216
                    munmap(block->host, block->length);
1217
                    close(block->fd);
1218
                } else {
1219
                    qemu_anon_ram_free(block->host, block->length);
1220
                }
1221
#else
1222
                abort();
1223
#endif
1224
            } else {
1225
                if (xen_enabled()) {
1226
                    xen_invalidate_map_cache_entry(block->host);
1227
                } else {
1228
                    qemu_anon_ram_free(block->host, block->length);
1229
                }
1230
            }
1231
            g_free(block);
1232
            break;
1233
        }
1234
    }
1235
    qemu_mutex_unlock_ramlist();
1236

    
1237
}
1238

    
1239
#ifndef _WIN32
1240
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1241
{
1242
    RAMBlock *block;
1243
    ram_addr_t offset;
1244
    int flags;
1245
    void *area, *vaddr;
1246

    
1247
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1248
        offset = addr - block->offset;
1249
        if (offset < block->length) {
1250
            vaddr = block->host + offset;
1251
            if (block->flags & RAM_PREALLOC_MASK) {
1252
                ;
1253
            } else {
1254
                flags = MAP_FIXED;
1255
                munmap(vaddr, length);
1256
                if (mem_path) {
1257
#if defined(__linux__) && !defined(TARGET_S390X)
1258
                    if (block->fd) {
1259
#ifdef MAP_POPULATE
1260
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1261
                            MAP_PRIVATE;
1262
#else
1263
                        flags |= MAP_PRIVATE;
1264
#endif
1265
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1266
                                    flags, block->fd, offset);
1267
                    } else {
1268
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1269
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1270
                                    flags, -1, 0);
1271
                    }
1272
#else
1273
                    abort();
1274
#endif
1275
                } else {
1276
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1277
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
1278
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1279
                                flags, -1, 0);
1280
#else
1281
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1282
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1283
                                flags, -1, 0);
1284
#endif
1285
                }
1286
                if (area != vaddr) {
1287
                    fprintf(stderr, "Could not remap addr: "
1288
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1289
                            length, addr);
1290
                    exit(1);
1291
                }
1292
                memory_try_enable_merging(vaddr, length);
1293
                qemu_ram_setup_dump(vaddr, length);
1294
            }
1295
            return;
1296
        }
1297
    }
1298
}
1299
#endif /* !_WIN32 */
1300

    
1301
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
1302
{
1303
    RAMBlock *block;
1304

    
1305
    /* The list is protected by the iothread lock here.  */
1306
    block = ram_list.mru_block;
1307
    if (block && addr - block->offset < block->length) {
1308
        goto found;
1309
    }
1310
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1311
        if (addr - block->offset < block->length) {
1312
            goto found;
1313
        }
1314
    }
1315

    
1316
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1317
    abort();
1318

    
1319
found:
1320
    ram_list.mru_block = block;
1321
    return block;
1322
}
1323

    
1324
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1325
   With the exception of the softmmu code in this file, this should
1326
   only be used for local memory (e.g. video ram) that the device owns,
1327
   and knows it isn't going to access beyond the end of the block.
1328

1329
   It should not be used for general purpose DMA.
1330
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1331
 */
1332
void *qemu_get_ram_ptr(ram_addr_t addr)
1333
{
1334
    RAMBlock *block = qemu_get_ram_block(addr);
1335

    
1336
    if (xen_enabled()) {
1337
        /* We need to check if the requested address is in the RAM
1338
         * because we don't want to map the entire memory in QEMU.
1339
         * In that case just map until the end of the page.
1340
         */
1341
        if (block->offset == 0) {
1342
            return xen_map_cache(addr, 0, 0);
1343
        } else if (block->host == NULL) {
1344
            block->host =
1345
                xen_map_cache(block->offset, block->length, 1);
1346
        }
1347
    }
1348
    return block->host + (addr - block->offset);
1349
}
1350

    
1351
/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1352
 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1353
 *
1354
 * ??? Is this still necessary?
1355
 */
1356
static void *qemu_safe_ram_ptr(ram_addr_t addr)
1357
{
1358
    RAMBlock *block;
1359

    
1360
    /* The list is protected by the iothread lock here.  */
1361
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1362
        if (addr - block->offset < block->length) {
1363
            if (xen_enabled()) {
1364
                /* We need to check if the requested address is in the RAM
1365
                 * because we don't want to map the entire memory in QEMU.
1366
                 * In that case just map until the end of the page.
1367
                 */
1368
                if (block->offset == 0) {
1369
                    return xen_map_cache(addr, 0, 0);
1370
                } else if (block->host == NULL) {
1371
                    block->host =
1372
                        xen_map_cache(block->offset, block->length, 1);
1373
                }
1374
            }
1375
            return block->host + (addr - block->offset);
1376
        }
1377
    }
1378

    
1379
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1380
    abort();
1381

    
1382
    return NULL;
1383
}
1384

    
1385
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1386
 * but takes a size argument */
1387
static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1388
{
1389
    if (*size == 0) {
1390
        return NULL;
1391
    }
1392
    if (xen_enabled()) {
1393
        return xen_map_cache(addr, *size, 1);
1394
    } else {
1395
        RAMBlock *block;
1396

    
1397
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1398
            if (addr - block->offset < block->length) {
1399
                if (addr - block->offset + *size > block->length)
1400
                    *size = block->length - addr + block->offset;
1401
                return block->host + (addr - block->offset);
1402
            }
1403
        }
1404

    
1405
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1406
        abort();
1407
    }
1408
}
1409

    
1410
/* Some of the softmmu routines need to translate from a host pointer
1411
   (typically a TLB entry) back to a ram offset.  */
1412
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1413
{
1414
    RAMBlock *block;
1415
    uint8_t *host = ptr;
1416

    
1417
    if (xen_enabled()) {
1418
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1419
        return qemu_get_ram_block(*ram_addr)->mr;
1420
    }
1421

    
1422
    block = ram_list.mru_block;
1423
    if (block && block->host && host - block->host < block->length) {
1424
        goto found;
1425
    }
1426

    
1427
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1428
        /* This case append when the block is not mapped. */
1429
        if (block->host == NULL) {
1430
            continue;
1431
        }
1432
        if (host - block->host < block->length) {
1433
            goto found;
1434
        }
1435
    }
1436

    
1437
    return NULL;
1438

    
1439
found:
1440
    *ram_addr = block->offset + (host - block->host);
1441
    return block->mr;
1442
}
1443

    
1444
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1445
                               uint64_t val, unsigned size)
1446
{
1447
    int dirty_flags;
1448
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1449
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1450
        tb_invalidate_phys_page_fast(ram_addr, size);
1451
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1452
    }
1453
    switch (size) {
1454
    case 1:
1455
        stb_p(qemu_get_ram_ptr(ram_addr), val);
1456
        break;
1457
    case 2:
1458
        stw_p(qemu_get_ram_ptr(ram_addr), val);
1459
        break;
1460
    case 4:
1461
        stl_p(qemu_get_ram_ptr(ram_addr), val);
1462
        break;
1463
    default:
1464
        abort();
1465
    }
1466
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1467
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1468
    /* we remove the notdirty callback only if the code has been
1469
       flushed */
1470
    if (dirty_flags == 0xff)
1471
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1472
}
1473

    
1474
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1475
                                 unsigned size, bool is_write)
1476
{
1477
    return is_write;
1478
}
1479

    
1480
static const MemoryRegionOps notdirty_mem_ops = {
1481
    .write = notdirty_mem_write,
1482
    .valid.accepts = notdirty_mem_accepts,
1483
    .endianness = DEVICE_NATIVE_ENDIAN,
1484
};
1485

    
1486
/* Generate a debug exception if a watchpoint has been hit.  */
1487
static void check_watchpoint(int offset, int len_mask, int flags)
1488
{
1489
    CPUArchState *env = cpu_single_env;
1490
    target_ulong pc, cs_base;
1491
    target_ulong vaddr;
1492
    CPUWatchpoint *wp;
1493
    int cpu_flags;
1494

    
1495
    if (env->watchpoint_hit) {
1496
        /* We re-entered the check after replacing the TB. Now raise
1497
         * the debug interrupt so that is will trigger after the
1498
         * current instruction. */
1499
        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1500
        return;
1501
    }
1502
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1503
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1504
        if ((vaddr == (wp->vaddr & len_mask) ||
1505
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1506
            wp->flags |= BP_WATCHPOINT_HIT;
1507
            if (!env->watchpoint_hit) {
1508
                env->watchpoint_hit = wp;
1509
                tb_check_watchpoint(env);
1510
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1511
                    env->exception_index = EXCP_DEBUG;
1512
                    cpu_loop_exit(env);
1513
                } else {
1514
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1515
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1516
                    cpu_resume_from_signal(env, NULL);
1517
                }
1518
            }
1519
        } else {
1520
            wp->flags &= ~BP_WATCHPOINT_HIT;
1521
        }
1522
    }
1523
}
1524

    
1525
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1526
   so these check for a hit then pass through to the normal out-of-line
1527
   phys routines.  */
1528
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1529
                               unsigned size)
1530
{
1531
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1532
    switch (size) {
1533
    case 1: return ldub_phys(addr);
1534
    case 2: return lduw_phys(addr);
1535
    case 4: return ldl_phys(addr);
1536
    default: abort();
1537
    }
1538
}
1539

    
1540
static void watch_mem_write(void *opaque, hwaddr addr,
1541
                            uint64_t val, unsigned size)
1542
{
1543
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1544
    switch (size) {
1545
    case 1:
1546
        stb_phys(addr, val);
1547
        break;
1548
    case 2:
1549
        stw_phys(addr, val);
1550
        break;
1551
    case 4:
1552
        stl_phys(addr, val);
1553
        break;
1554
    default: abort();
1555
    }
1556
}
1557

    
1558
static const MemoryRegionOps watch_mem_ops = {
1559
    .read = watch_mem_read,
1560
    .write = watch_mem_write,
1561
    .endianness = DEVICE_NATIVE_ENDIAN,
1562
};
1563

    
1564
static uint64_t subpage_read(void *opaque, hwaddr addr,
1565
                             unsigned len)
1566
{
1567
    subpage_t *subpage = opaque;
1568
    uint8_t buf[4];
1569

    
1570
#if defined(DEBUG_SUBPAGE)
1571
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1572
           subpage, len, addr);
1573
#endif
1574
    address_space_read(subpage->as, addr + subpage->base, buf, len);
1575
    switch (len) {
1576
    case 1:
1577
        return ldub_p(buf);
1578
    case 2:
1579
        return lduw_p(buf);
1580
    case 4:
1581
        return ldl_p(buf);
1582
    default:
1583
        abort();
1584
    }
1585
}
1586

    
1587
static void subpage_write(void *opaque, hwaddr addr,
1588
                          uint64_t value, unsigned len)
1589
{
1590
    subpage_t *subpage = opaque;
1591
    uint8_t buf[4];
1592

    
1593
#if defined(DEBUG_SUBPAGE)
1594
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1595
           " value %"PRIx64"\n",
1596
           __func__, subpage, len, addr, value);
1597
#endif
1598
    switch (len) {
1599
    case 1:
1600
        stb_p(buf, value);
1601
        break;
1602
    case 2:
1603
        stw_p(buf, value);
1604
        break;
1605
    case 4:
1606
        stl_p(buf, value);
1607
        break;
1608
    default:
1609
        abort();
1610
    }
1611
    address_space_write(subpage->as, addr + subpage->base, buf, len);
1612
}
1613

    
1614
static bool subpage_accepts(void *opaque, hwaddr addr,
1615
                            unsigned size, bool is_write)
1616
{
1617
    subpage_t *subpage = opaque;
1618
#if defined(DEBUG_SUBPAGE)
1619
    printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1620
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
1621
#endif
1622

    
1623
    return address_space_access_valid(subpage->as, addr + subpage->base,
1624
                                      size, is_write);
1625
}
1626

    
1627
static const MemoryRegionOps subpage_ops = {
1628
    .read = subpage_read,
1629
    .write = subpage_write,
1630
    .valid.accepts = subpage_accepts,
1631
    .endianness = DEVICE_NATIVE_ENDIAN,
1632
};
1633

    
1634
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1635
                             uint16_t section)
1636
{
1637
    int idx, eidx;
1638

    
1639
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1640
        return -1;
1641
    idx = SUBPAGE_IDX(start);
1642
    eidx = SUBPAGE_IDX(end);
1643
#if defined(DEBUG_SUBPAGE)
1644
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1645
           mmio, start, end, idx, eidx, memory);
1646
#endif
1647
    for (; idx <= eidx; idx++) {
1648
        mmio->sub_section[idx] = section;
1649
    }
1650

    
1651
    return 0;
1652
}
1653

    
1654
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1655
{
1656
    subpage_t *mmio;
1657

    
1658
    mmio = g_malloc0(sizeof(subpage_t));
1659

    
1660
    mmio->as = as;
1661
    mmio->base = base;
1662
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1663
                          "subpage", TARGET_PAGE_SIZE);
1664
    mmio->iomem.subpage = true;
1665
#if defined(DEBUG_SUBPAGE)
1666
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1667
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1668
#endif
1669
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1670

    
1671
    return mmio;
1672
}
1673

    
1674
static uint16_t dummy_section(MemoryRegion *mr)
1675
{
1676
    MemoryRegionSection section = {
1677
        .mr = mr,
1678
        .offset_within_address_space = 0,
1679
        .offset_within_region = 0,
1680
        .size = int128_2_64(),
1681
    };
1682

    
1683
    return phys_section_add(&section);
1684
}
1685

    
1686
MemoryRegion *iotlb_to_region(hwaddr index)
1687
{
1688
    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
1689
}
1690

    
1691
static void io_mem_init(void)
1692
{
1693
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1694
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1695
                          "unassigned", UINT64_MAX);
1696
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1697
                          "notdirty", UINT64_MAX);
1698
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1699
                          "watch", UINT64_MAX);
1700
}
1701

    
1702
static void mem_begin(MemoryListener *listener)
1703
{
1704
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1705
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1706

    
1707
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1708
    d->as = as;
1709
    as->next_dispatch = d;
1710
}
1711

    
1712
static void mem_commit(MemoryListener *listener)
1713
{
1714
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1715
    AddressSpaceDispatch *cur = as->dispatch;
1716
    AddressSpaceDispatch *next = as->next_dispatch;
1717

    
1718
    next->nodes = next_map.nodes;
1719
    next->sections = next_map.sections;
1720

    
1721
    as->dispatch = next;
1722
    g_free(cur);
1723
}
1724

    
1725
static void core_begin(MemoryListener *listener)
1726
{
1727
    uint16_t n;
1728

    
1729
    prev_map = g_new(PhysPageMap, 1);
1730
    *prev_map = next_map;
1731

    
1732
    memset(&next_map, 0, sizeof(next_map));
1733
    n = dummy_section(&io_mem_unassigned);
1734
    assert(n == PHYS_SECTION_UNASSIGNED);
1735
    n = dummy_section(&io_mem_notdirty);
1736
    assert(n == PHYS_SECTION_NOTDIRTY);
1737
    n = dummy_section(&io_mem_rom);
1738
    assert(n == PHYS_SECTION_ROM);
1739
    n = dummy_section(&io_mem_watch);
1740
    assert(n == PHYS_SECTION_WATCH);
1741
}
1742

    
1743
/* This listener's commit run after the other AddressSpaceDispatch listeners'.
1744
 * All AddressSpaceDispatch instances have switched to the next map.
1745
 */
1746
static void core_commit(MemoryListener *listener)
1747
{
1748
    phys_sections_free(prev_map);
1749
}
1750

    
1751
static void tcg_commit(MemoryListener *listener)
1752
{
1753
    CPUArchState *env;
1754

    
1755
    /* since each CPU stores ram addresses in its TLB cache, we must
1756
       reset the modified entries */
1757
    /* XXX: slow ! */
1758
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1759
        tlb_flush(env, 1);
1760
    }
1761
}
1762

    
1763
static void core_log_global_start(MemoryListener *listener)
1764
{
1765
    cpu_physical_memory_set_dirty_tracking(1);
1766
}
1767

    
1768
static void core_log_global_stop(MemoryListener *listener)
1769
{
1770
    cpu_physical_memory_set_dirty_tracking(0);
1771
}
1772

    
1773
static MemoryListener core_memory_listener = {
1774
    .begin = core_begin,
1775
    .commit = core_commit,
1776
    .log_global_start = core_log_global_start,
1777
    .log_global_stop = core_log_global_stop,
1778
    .priority = 1,
1779
};
1780

    
1781
static MemoryListener tcg_memory_listener = {
1782
    .commit = tcg_commit,
1783
};
1784

    
1785
void address_space_init_dispatch(AddressSpace *as)
1786
{
1787
    as->dispatch = NULL;
1788
    as->dispatch_listener = (MemoryListener) {
1789
        .begin = mem_begin,
1790
        .commit = mem_commit,
1791
        .region_add = mem_add,
1792
        .region_nop = mem_add,
1793
        .priority = 0,
1794
    };
1795
    memory_listener_register(&as->dispatch_listener, as);
1796
}
1797

    
1798
void address_space_destroy_dispatch(AddressSpace *as)
1799
{
1800
    AddressSpaceDispatch *d = as->dispatch;
1801

    
1802
    memory_listener_unregister(&as->dispatch_listener);
1803
    g_free(d);
1804
    as->dispatch = NULL;
1805
}
1806

    
1807
static void memory_map_init(void)
1808
{
1809
    system_memory = g_malloc(sizeof(*system_memory));
1810
    memory_region_init(system_memory, NULL, "system", INT64_MAX);
1811
    address_space_init(&address_space_memory, system_memory, "memory");
1812

    
1813
    system_io = g_malloc(sizeof(*system_io));
1814
    memory_region_init(system_io, NULL, "io", 65536);
1815
    address_space_init(&address_space_io, system_io, "I/O");
1816

    
1817
    memory_listener_register(&core_memory_listener, &address_space_memory);
1818
    memory_listener_register(&tcg_memory_listener, &address_space_memory);
1819
}
1820

    
1821
MemoryRegion *get_system_memory(void)
1822
{
1823
    return system_memory;
1824
}
1825

    
1826
MemoryRegion *get_system_io(void)
1827
{
1828
    return system_io;
1829
}
1830

    
1831
#endif /* !defined(CONFIG_USER_ONLY) */
1832

    
1833
/* physical memory access (slow version, mainly for debug) */
1834
#if defined(CONFIG_USER_ONLY)
1835
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1836
                        uint8_t *buf, int len, int is_write)
1837
{
1838
    int l, flags;
1839
    target_ulong page;
1840
    void * p;
1841

    
1842
    while (len > 0) {
1843
        page = addr & TARGET_PAGE_MASK;
1844
        l = (page + TARGET_PAGE_SIZE) - addr;
1845
        if (l > len)
1846
            l = len;
1847
        flags = page_get_flags(page);
1848
        if (!(flags & PAGE_VALID))
1849
            return -1;
1850
        if (is_write) {
1851
            if (!(flags & PAGE_WRITE))
1852
                return -1;
1853
            /* XXX: this code should not depend on lock_user */
1854
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1855
                return -1;
1856
            memcpy(p, buf, l);
1857
            unlock_user(p, addr, l);
1858
        } else {
1859
            if (!(flags & PAGE_READ))
1860
                return -1;
1861
            /* XXX: this code should not depend on lock_user */
1862
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1863
                return -1;
1864
            memcpy(buf, p, l);
1865
            unlock_user(p, addr, 0);
1866
        }
1867
        len -= l;
1868
        buf += l;
1869
        addr += l;
1870
    }
1871
    return 0;
1872
}
1873

    
1874
#else
1875

    
1876
static void invalidate_and_set_dirty(hwaddr addr,
1877
                                     hwaddr length)
1878
{
1879
    if (!cpu_physical_memory_is_dirty(addr)) {
1880
        /* invalidate code */
1881
        tb_invalidate_phys_page_range(addr, addr + length, 0);
1882
        /* set dirty bit */
1883
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1884
    }
1885
    xen_modified_memory(addr, length);
1886
}
1887

    
1888
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1889
{
1890
    if (memory_region_is_ram(mr)) {
1891
        return !(is_write && mr->readonly);
1892
    }
1893
    if (memory_region_is_romd(mr)) {
1894
        return !is_write;
1895
    }
1896

    
1897
    return false;
1898
}
1899

    
1900
static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1901
{
1902
    if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1903
        return 4;
1904
    }
1905
    if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1906
        return 2;
1907
    }
1908
    return 1;
1909
}
1910

    
1911
bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1912
                      int len, bool is_write)
1913
{
1914
    hwaddr l;
1915
    uint8_t *ptr;
1916
    uint64_t val;
1917
    hwaddr addr1;
1918
    MemoryRegion *mr;
1919
    bool error = false;
1920

    
1921
    while (len > 0) {
1922
        l = len;
1923
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
1924

    
1925
        if (is_write) {
1926
            if (!memory_access_is_direct(mr, is_write)) {
1927
                l = memory_access_size(mr, l, addr1);
1928
                /* XXX: could force cpu_single_env to NULL to avoid
1929
                   potential bugs */
1930
                if (l == 4) {
1931
                    /* 32 bit write access */
1932
                    val = ldl_p(buf);
1933
                    error |= io_mem_write(mr, addr1, val, 4);
1934
                } else if (l == 2) {
1935
                    /* 16 bit write access */
1936
                    val = lduw_p(buf);
1937
                    error |= io_mem_write(mr, addr1, val, 2);
1938
                } else {
1939
                    /* 8 bit write access */
1940
                    val = ldub_p(buf);
1941
                    error |= io_mem_write(mr, addr1, val, 1);
1942
                }
1943
            } else {
1944
                addr1 += memory_region_get_ram_addr(mr);
1945
                /* RAM case */
1946
                ptr = qemu_get_ram_ptr(addr1);
1947
                memcpy(ptr, buf, l);
1948
                invalidate_and_set_dirty(addr1, l);
1949
            }
1950
        } else {
1951
            if (!memory_access_is_direct(mr, is_write)) {
1952
                /* I/O case */
1953
                l = memory_access_size(mr, l, addr1);
1954
                if (l == 4) {
1955
                    /* 32 bit read access */
1956
                    error |= io_mem_read(mr, addr1, &val, 4);
1957
                    stl_p(buf, val);
1958
                } else if (l == 2) {
1959
                    /* 16 bit read access */
1960
                    error |= io_mem_read(mr, addr1, &val, 2);
1961
                    stw_p(buf, val);
1962
                } else {
1963
                    /* 8 bit read access */
1964
                    error |= io_mem_read(mr, addr1, &val, 1);
1965
                    stb_p(buf, val);
1966
                }
1967
            } else {
1968
                /* RAM case */
1969
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
1970
                memcpy(buf, ptr, l);
1971
            }
1972
        }
1973
        len -= l;
1974
        buf += l;
1975
        addr += l;
1976
    }
1977

    
1978
    return error;
1979
}
1980

    
1981
bool address_space_write(AddressSpace *as, hwaddr addr,
1982
                         const uint8_t *buf, int len)
1983
{
1984
    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1985
}
1986

    
1987
bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1988
{
1989
    return address_space_rw(as, addr, buf, len, false);
1990
}
1991

    
1992

    
1993
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1994
                            int len, int is_write)
1995
{
1996
    address_space_rw(&address_space_memory, addr, buf, len, is_write);
1997
}
1998

    
1999
/* used for ROM loading : can write in RAM and ROM */
2000
void cpu_physical_memory_write_rom(hwaddr addr,
2001
                                   const uint8_t *buf, int len)
2002
{
2003
    hwaddr l;
2004
    uint8_t *ptr;
2005
    hwaddr addr1;
2006
    MemoryRegion *mr;
2007

    
2008
    while (len > 0) {
2009
        l = len;
2010
        mr = address_space_translate(&address_space_memory,
2011
                                     addr, &addr1, &l, true);
2012

    
2013
        if (!(memory_region_is_ram(mr) ||
2014
              memory_region_is_romd(mr))) {
2015
            /* do nothing */
2016
        } else {
2017
            addr1 += memory_region_get_ram_addr(mr);
2018
            /* ROM/RAM case */
2019
            ptr = qemu_get_ram_ptr(addr1);
2020
            memcpy(ptr, buf, l);
2021
            invalidate_and_set_dirty(addr1, l);
2022
        }
2023
        len -= l;
2024
        buf += l;
2025
        addr += l;
2026
    }
2027
}
2028

    
2029
typedef struct {
2030
    MemoryRegion *mr;
2031
    void *buffer;
2032
    hwaddr addr;
2033
    hwaddr len;
2034
} BounceBuffer;
2035

    
2036
static BounceBuffer bounce;
2037

    
2038
typedef struct MapClient {
2039
    void *opaque;
2040
    void (*callback)(void *opaque);
2041
    QLIST_ENTRY(MapClient) link;
2042
} MapClient;
2043

    
2044
static QLIST_HEAD(map_client_list, MapClient) map_client_list
2045
    = QLIST_HEAD_INITIALIZER(map_client_list);
2046

    
2047
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2048
{
2049
    MapClient *client = g_malloc(sizeof(*client));
2050

    
2051
    client->opaque = opaque;
2052
    client->callback = callback;
2053
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2054
    return client;
2055
}
2056

    
2057
static void cpu_unregister_map_client(void *_client)
2058
{
2059
    MapClient *client = (MapClient *)_client;
2060

    
2061
    QLIST_REMOVE(client, link);
2062
    g_free(client);
2063
}
2064

    
2065
static void cpu_notify_map_clients(void)
2066
{
2067
    MapClient *client;
2068

    
2069
    while (!QLIST_EMPTY(&map_client_list)) {
2070
        client = QLIST_FIRST(&map_client_list);
2071
        client->callback(client->opaque);
2072
        cpu_unregister_map_client(client);
2073
    }
2074
}
2075

    
2076
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2077
{
2078
    MemoryRegion *mr;
2079
    hwaddr l, xlat;
2080

    
2081
    while (len > 0) {
2082
        l = len;
2083
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2084
        if (!memory_access_is_direct(mr, is_write)) {
2085
            l = memory_access_size(mr, l, addr);
2086
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2087
                return false;
2088
            }
2089
        }
2090

    
2091
        len -= l;
2092
        addr += l;
2093
    }
2094
    return true;
2095
}
2096

    
2097
/* Map a physical memory region into a host virtual address.
2098
 * May map a subset of the requested range, given by and returned in *plen.
2099
 * May return NULL if resources needed to perform the mapping are exhausted.
2100
 * Use only for reads OR writes - not for read-modify-write operations.
2101
 * Use cpu_register_map_client() to know when retrying the map operation is
2102
 * likely to succeed.
2103
 */
2104
void *address_space_map(AddressSpace *as,
2105
                        hwaddr addr,
2106
                        hwaddr *plen,
2107
                        bool is_write)
2108
{
2109
    hwaddr len = *plen;
2110
    hwaddr done = 0;
2111
    hwaddr l, xlat, base;
2112
    MemoryRegion *mr, *this_mr;
2113
    ram_addr_t raddr;
2114

    
2115
    if (len == 0) {
2116
        return NULL;
2117
    }
2118

    
2119
    l = len;
2120
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2121
    if (!memory_access_is_direct(mr, is_write)) {
2122
        if (bounce.buffer) {
2123
            return NULL;
2124
        }
2125
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2126
        bounce.addr = addr;
2127
        bounce.len = l;
2128

    
2129
        memory_region_ref(mr);
2130
        bounce.mr = mr;
2131
        if (!is_write) {
2132
            address_space_read(as, addr, bounce.buffer, l);
2133
        }
2134

    
2135
        *plen = l;
2136
        return bounce.buffer;
2137
    }
2138

    
2139
    base = xlat;
2140
    raddr = memory_region_get_ram_addr(mr);
2141

    
2142
    for (;;) {
2143
        len -= l;
2144
        addr += l;
2145
        done += l;
2146
        if (len == 0) {
2147
            break;
2148
        }
2149

    
2150
        l = len;
2151
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2152
        if (this_mr != mr || xlat != base + done) {
2153
            break;
2154
        }
2155
    }
2156

    
2157
    memory_region_ref(mr);
2158
    *plen = done;
2159
    return qemu_ram_ptr_length(raddr + base, plen);
2160
}
2161

    
2162
/* Unmaps a memory region previously mapped by address_space_map().
2163
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2164
 * the amount of memory that was actually read or written by the caller.
2165
 */
2166
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2167
                         int is_write, hwaddr access_len)
2168
{
2169
    if (buffer != bounce.buffer) {
2170
        MemoryRegion *mr;
2171
        ram_addr_t addr1;
2172

    
2173
        mr = qemu_ram_addr_from_host(buffer, &addr1);
2174
        assert(mr != NULL);
2175
        if (is_write) {
2176
            while (access_len) {
2177
                unsigned l;
2178
                l = TARGET_PAGE_SIZE;
2179
                if (l > access_len)
2180
                    l = access_len;
2181
                invalidate_and_set_dirty(addr1, l);
2182
                addr1 += l;
2183
                access_len -= l;
2184
            }
2185
        }
2186
        if (xen_enabled()) {
2187
            xen_invalidate_map_cache_entry(buffer);
2188
        }
2189
        memory_region_unref(mr);
2190
        return;
2191
    }
2192
    if (is_write) {
2193
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2194
    }
2195
    qemu_vfree(bounce.buffer);
2196
    bounce.buffer = NULL;
2197
    memory_region_unref(bounce.mr);
2198
    cpu_notify_map_clients();
2199
}
2200

    
2201
void *cpu_physical_memory_map(hwaddr addr,
2202
                              hwaddr *plen,
2203
                              int is_write)
2204
{
2205
    return address_space_map(&address_space_memory, addr, plen, is_write);
2206
}
2207

    
2208
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2209
                               int is_write, hwaddr access_len)
2210
{
2211
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2212
}
2213

    
2214
/* warning: addr must be aligned */
2215
static inline uint32_t ldl_phys_internal(hwaddr addr,
2216
                                         enum device_endian endian)
2217
{
2218
    uint8_t *ptr;
2219
    uint64_t val;
2220
    MemoryRegion *mr;
2221
    hwaddr l = 4;
2222
    hwaddr addr1;
2223

    
2224
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2225
                                 false);
2226
    if (l < 4 || !memory_access_is_direct(mr, false)) {
2227
        /* I/O case */
2228
        io_mem_read(mr, addr1, &val, 4);
2229
#if defined(TARGET_WORDS_BIGENDIAN)
2230
        if (endian == DEVICE_LITTLE_ENDIAN) {
2231
            val = bswap32(val);
2232
        }
2233
#else
2234
        if (endian == DEVICE_BIG_ENDIAN) {
2235
            val = bswap32(val);
2236
        }
2237
#endif
2238
    } else {
2239
        /* RAM case */
2240
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2241
                                & TARGET_PAGE_MASK)
2242
                               + addr1);
2243
        switch (endian) {
2244
        case DEVICE_LITTLE_ENDIAN:
2245
            val = ldl_le_p(ptr);
2246
            break;
2247
        case DEVICE_BIG_ENDIAN:
2248
            val = ldl_be_p(ptr);
2249
            break;
2250
        default:
2251
            val = ldl_p(ptr);
2252
            break;
2253
        }
2254
    }
2255
    return val;
2256
}
2257

    
2258
uint32_t ldl_phys(hwaddr addr)
2259
{
2260
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2261
}
2262

    
2263
uint32_t ldl_le_phys(hwaddr addr)
2264
{
2265
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2266
}
2267

    
2268
uint32_t ldl_be_phys(hwaddr addr)
2269
{
2270
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2271
}
2272

    
2273
/* warning: addr must be aligned */
2274
static inline uint64_t ldq_phys_internal(hwaddr addr,
2275
                                         enum device_endian endian)
2276
{
2277
    uint8_t *ptr;
2278
    uint64_t val;
2279
    MemoryRegion *mr;
2280
    hwaddr l = 8;
2281
    hwaddr addr1;
2282

    
2283
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2284
                                 false);
2285
    if (l < 8 || !memory_access_is_direct(mr, false)) {
2286
        /* I/O case */
2287
        io_mem_read(mr, addr1, &val, 8);
2288
#if defined(TARGET_WORDS_BIGENDIAN)
2289
        if (endian == DEVICE_LITTLE_ENDIAN) {
2290
            val = bswap64(val);
2291
        }
2292
#else
2293
        if (endian == DEVICE_BIG_ENDIAN) {
2294
            val = bswap64(val);
2295
        }
2296
#endif
2297
    } else {
2298
        /* RAM case */
2299
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2300
                                & TARGET_PAGE_MASK)
2301
                               + addr1);
2302
        switch (endian) {
2303
        case DEVICE_LITTLE_ENDIAN:
2304
            val = ldq_le_p(ptr);
2305
            break;
2306
        case DEVICE_BIG_ENDIAN:
2307
            val = ldq_be_p(ptr);
2308
            break;
2309
        default:
2310
            val = ldq_p(ptr);
2311
            break;
2312
        }
2313
    }
2314
    return val;
2315
}
2316

    
2317
uint64_t ldq_phys(hwaddr addr)
2318
{
2319
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2320
}
2321

    
2322
uint64_t ldq_le_phys(hwaddr addr)
2323
{
2324
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2325
}
2326

    
2327
uint64_t ldq_be_phys(hwaddr addr)
2328
{
2329
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2330
}
2331

    
2332
/* XXX: optimize */
2333
uint32_t ldub_phys(hwaddr addr)
2334
{
2335
    uint8_t val;
2336
    cpu_physical_memory_read(addr, &val, 1);
2337
    return val;
2338
}
2339

    
2340
/* warning: addr must be aligned */
2341
static inline uint32_t lduw_phys_internal(hwaddr addr,
2342
                                          enum device_endian endian)
2343
{
2344
    uint8_t *ptr;
2345
    uint64_t val;
2346
    MemoryRegion *mr;
2347
    hwaddr l = 2;
2348
    hwaddr addr1;
2349

    
2350
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2351
                                 false);
2352
    if (l < 2 || !memory_access_is_direct(mr, false)) {
2353
        /* I/O case */
2354
        io_mem_read(mr, addr1, &val, 2);
2355
#if defined(TARGET_WORDS_BIGENDIAN)
2356
        if (endian == DEVICE_LITTLE_ENDIAN) {
2357
            val = bswap16(val);
2358
        }
2359
#else
2360
        if (endian == DEVICE_BIG_ENDIAN) {
2361
            val = bswap16(val);
2362
        }
2363
#endif
2364
    } else {
2365
        /* RAM case */
2366
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2367
                                & TARGET_PAGE_MASK)
2368
                               + addr1);
2369
        switch (endian) {
2370
        case DEVICE_LITTLE_ENDIAN:
2371
            val = lduw_le_p(ptr);
2372
            break;
2373
        case DEVICE_BIG_ENDIAN:
2374
            val = lduw_be_p(ptr);
2375
            break;
2376
        default:
2377
            val = lduw_p(ptr);
2378
            break;
2379
        }
2380
    }
2381
    return val;
2382
}
2383

    
2384
uint32_t lduw_phys(hwaddr addr)
2385
{
2386
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2387
}
2388

    
2389
uint32_t lduw_le_phys(hwaddr addr)
2390
{
2391
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2392
}
2393

    
2394
uint32_t lduw_be_phys(hwaddr addr)
2395
{
2396
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2397
}
2398

    
2399
/* warning: addr must be aligned. The ram page is not masked as dirty
2400
   and the code inside is not invalidated. It is useful if the dirty
2401
   bits are used to track modified PTEs */
2402
void stl_phys_notdirty(hwaddr addr, uint32_t val)
2403
{
2404
    uint8_t *ptr;
2405
    MemoryRegion *mr;
2406
    hwaddr l = 4;
2407
    hwaddr addr1;
2408

    
2409
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2410
                                 true);
2411
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2412
        io_mem_write(mr, addr1, val, 4);
2413
    } else {
2414
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2415
        ptr = qemu_get_ram_ptr(addr1);
2416
        stl_p(ptr, val);
2417

    
2418
        if (unlikely(in_migration)) {
2419
            if (!cpu_physical_memory_is_dirty(addr1)) {
2420
                /* invalidate code */
2421
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2422
                /* set dirty bit */
2423
                cpu_physical_memory_set_dirty_flags(
2424
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2425
            }
2426
        }
2427
    }
2428
}
2429

    
2430
/* warning: addr must be aligned */
2431
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2432
                                     enum device_endian endian)
2433
{
2434
    uint8_t *ptr;
2435
    MemoryRegion *mr;
2436
    hwaddr l = 4;
2437
    hwaddr addr1;
2438

    
2439
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2440
                                 true);
2441
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2442
#if defined(TARGET_WORDS_BIGENDIAN)
2443
        if (endian == DEVICE_LITTLE_ENDIAN) {
2444
            val = bswap32(val);
2445
        }
2446
#else
2447
        if (endian == DEVICE_BIG_ENDIAN) {
2448
            val = bswap32(val);
2449
        }
2450
#endif
2451
        io_mem_write(mr, addr1, val, 4);
2452
    } else {
2453
        /* RAM case */
2454
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2455
        ptr = qemu_get_ram_ptr(addr1);
2456
        switch (endian) {
2457
        case DEVICE_LITTLE_ENDIAN:
2458
            stl_le_p(ptr, val);
2459
            break;
2460
        case DEVICE_BIG_ENDIAN:
2461
            stl_be_p(ptr, val);
2462
            break;
2463
        default:
2464
            stl_p(ptr, val);
2465
            break;
2466
        }
2467
        invalidate_and_set_dirty(addr1, 4);
2468
    }
2469
}
2470

    
2471
void stl_phys(hwaddr addr, uint32_t val)
2472
{
2473
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2474
}
2475

    
2476
void stl_le_phys(hwaddr addr, uint32_t val)
2477
{
2478
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2479
}
2480

    
2481
void stl_be_phys(hwaddr addr, uint32_t val)
2482
{
2483
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2484
}
2485

    
2486
/* XXX: optimize */
2487
void stb_phys(hwaddr addr, uint32_t val)
2488
{
2489
    uint8_t v = val;
2490
    cpu_physical_memory_write(addr, &v, 1);
2491
}
2492

    
2493
/* warning: addr must be aligned */
2494
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2495
                                     enum device_endian endian)
2496
{
2497
    uint8_t *ptr;
2498
    MemoryRegion *mr;
2499
    hwaddr l = 2;
2500
    hwaddr addr1;
2501

    
2502
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2503
                                 true);
2504
    if (l < 2 || !memory_access_is_direct(mr, true)) {
2505
#if defined(TARGET_WORDS_BIGENDIAN)
2506
        if (endian == DEVICE_LITTLE_ENDIAN) {
2507
            val = bswap16(val);
2508
        }
2509
#else
2510
        if (endian == DEVICE_BIG_ENDIAN) {
2511
            val = bswap16(val);
2512
        }
2513
#endif
2514
        io_mem_write(mr, addr1, val, 2);
2515
    } else {
2516
        /* RAM case */
2517
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2518
        ptr = qemu_get_ram_ptr(addr1);
2519
        switch (endian) {
2520
        case DEVICE_LITTLE_ENDIAN:
2521
            stw_le_p(ptr, val);
2522
            break;
2523
        case DEVICE_BIG_ENDIAN:
2524
            stw_be_p(ptr, val);
2525
            break;
2526
        default:
2527
            stw_p(ptr, val);
2528
            break;
2529
        }
2530
        invalidate_and_set_dirty(addr1, 2);
2531
    }
2532
}
2533

    
2534
void stw_phys(hwaddr addr, uint32_t val)
2535
{
2536
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2537
}
2538

    
2539
void stw_le_phys(hwaddr addr, uint32_t val)
2540
{
2541
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2542
}
2543

    
2544
void stw_be_phys(hwaddr addr, uint32_t val)
2545
{
2546
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2547
}
2548

    
2549
/* XXX: optimize */
2550
void stq_phys(hwaddr addr, uint64_t val)
2551
{
2552
    val = tswap64(val);
2553
    cpu_physical_memory_write(addr, &val, 8);
2554
}
2555

    
2556
void stq_le_phys(hwaddr addr, uint64_t val)
2557
{
2558
    val = cpu_to_le64(val);
2559
    cpu_physical_memory_write(addr, &val, 8);
2560
}
2561

    
2562
void stq_be_phys(hwaddr addr, uint64_t val)
2563
{
2564
    val = cpu_to_be64(val);
2565
    cpu_physical_memory_write(addr, &val, 8);
2566
}
2567

    
2568
/* virtual memory access for debug (includes writing to ROM) */
2569
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2570
                        uint8_t *buf, int len, int is_write)
2571
{
2572
    int l;
2573
    hwaddr phys_addr;
2574
    target_ulong page;
2575

    
2576
    while (len > 0) {
2577
        page = addr & TARGET_PAGE_MASK;
2578
        phys_addr = cpu_get_phys_page_debug(env, page);
2579
        /* if no physical page mapped, return an error */
2580
        if (phys_addr == -1)
2581
            return -1;
2582
        l = (page + TARGET_PAGE_SIZE) - addr;
2583
        if (l > len)
2584
            l = len;
2585
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2586
        if (is_write)
2587
            cpu_physical_memory_write_rom(phys_addr, buf, l);
2588
        else
2589
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2590
        len -= l;
2591
        buf += l;
2592
        addr += l;
2593
    }
2594
    return 0;
2595
}
2596
#endif
2597

    
2598
#if !defined(CONFIG_USER_ONLY)
2599

    
2600
/*
2601
 * A helper function for the _utterly broken_ virtio device model to find out if
2602
 * it's running on a big endian machine. Don't do this at home kids!
2603
 */
2604
bool virtio_is_big_endian(void);
2605
bool virtio_is_big_endian(void)
2606
{
2607
#if defined(TARGET_WORDS_BIGENDIAN)
2608
    return true;
2609
#else
2610
    return false;
2611
#endif
2612
}
2613

    
2614
#endif
2615

    
2616
#ifndef CONFIG_USER_ONLY
2617
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2618
{
2619
    MemoryRegion*mr;
2620
    hwaddr l = 1;
2621

    
2622
    mr = address_space_translate(&address_space_memory,
2623
                                 phys_addr, &phys_addr, &l, false);
2624

    
2625
    return !(memory_region_is_ram(mr) ||
2626
             memory_region_is_romd(mr));
2627
}
2628

    
2629
void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2630
{
2631
    RAMBlock *block;
2632

    
2633
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2634
        func(block->host, block->offset, block->length, opaque);
2635
    }
2636
}
2637
#endif