Statistics
| Branch: | Revision:

root / exec.c @ ef36fa14

History | View | Annotate | Download (72.2 kB)

1
/*
2
 *  Virtual page mapping
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "qemu/osdep.h"
33
#include "sysemu/kvm.h"
34
#include "sysemu/sysemu.h"
35
#include "hw/xen/xen.h"
36
#include "qemu/timer.h"
37
#include "qemu/config-file.h"
38
#include "exec/memory.h"
39
#include "sysemu/dma.h"
40
#include "exec/address-spaces.h"
41
#if defined(CONFIG_USER_ONLY)
42
#include <qemu.h>
43
#else /* !CONFIG_USER_ONLY */
44
#include "sysemu/xen-mapcache.h"
45
#include "trace.h"
46
#endif
47
#include "exec/cpu-all.h"
48

    
49
#include "exec/cputlb.h"
50
#include "translate-all.h"
51

    
52
#include "exec/memory-internal.h"
53

    
54
//#define DEBUG_SUBPAGE
55

    
56
#if !defined(CONFIG_USER_ONLY)
57
static int in_migration;
58

    
59
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60

    
61
static MemoryRegion *system_memory;
62
static MemoryRegion *system_io;
63

    
64
AddressSpace address_space_io;
65
AddressSpace address_space_memory;
66

    
67
MemoryRegion io_mem_rom, io_mem_notdirty;
68
static MemoryRegion io_mem_unassigned;
69

    
70
#endif
71

    
72
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
73
/* current CPU in the current thread. It is only valid inside
74
   cpu_exec() */
75
DEFINE_TLS(CPUState *, current_cpu);
76
/* 0 = Do not count executed instructions.
77
   1 = Precise instruction counting.
78
   2 = Adaptive rate instruction counting.  */
79
int use_icount;
80

    
81
#if !defined(CONFIG_USER_ONLY)
82

    
83
typedef struct PhysPageEntry PhysPageEntry;
84

    
85
struct PhysPageEntry {
86
    uint16_t is_leaf : 1;
87
     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
88
    uint16_t ptr : 15;
89
};
90

    
91
typedef PhysPageEntry Node[L2_SIZE];
92

    
93
struct AddressSpaceDispatch {
94
    /* This is a multi-level map on the physical address space.
95
     * The bottom level has pointers to MemoryRegionSections.
96
     */
97
    PhysPageEntry phys_map;
98
    Node *nodes;
99
    MemoryRegionSection *sections;
100
    AddressSpace *as;
101
};
102

    
103
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
104
typedef struct subpage_t {
105
    MemoryRegion iomem;
106
    AddressSpace *as;
107
    hwaddr base;
108
    uint16_t sub_section[TARGET_PAGE_SIZE];
109
} subpage_t;
110

    
111
#define PHYS_SECTION_UNASSIGNED 0
112
#define PHYS_SECTION_NOTDIRTY 1
113
#define PHYS_SECTION_ROM 2
114
#define PHYS_SECTION_WATCH 3
115

    
116
typedef struct PhysPageMap {
117
    unsigned sections_nb;
118
    unsigned sections_nb_alloc;
119
    unsigned nodes_nb;
120
    unsigned nodes_nb_alloc;
121
    Node *nodes;
122
    MemoryRegionSection *sections;
123
} PhysPageMap;
124

    
125
static PhysPageMap *prev_map;
126
static PhysPageMap next_map;
127

    
128
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
129

    
130
static void io_mem_init(void);
131
static void memory_map_init(void);
132

    
133
static MemoryRegion io_mem_watch;
134
#endif
135

    
136
#if !defined(CONFIG_USER_ONLY)
137

    
138
static void phys_map_node_reserve(unsigned nodes)
139
{
140
    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
141
        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
142
                                            16);
143
        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
144
                                      next_map.nodes_nb + nodes);
145
        next_map.nodes = g_renew(Node, next_map.nodes,
146
                                 next_map.nodes_nb_alloc);
147
    }
148
}
149

    
150
static uint16_t phys_map_node_alloc(void)
151
{
152
    unsigned i;
153
    uint16_t ret;
154

    
155
    ret = next_map.nodes_nb++;
156
    assert(ret != PHYS_MAP_NODE_NIL);
157
    assert(ret != next_map.nodes_nb_alloc);
158
    for (i = 0; i < L2_SIZE; ++i) {
159
        next_map.nodes[ret][i].is_leaf = 0;
160
        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
161
    }
162
    return ret;
163
}
164

    
165
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
166
                                hwaddr *nb, uint16_t leaf,
167
                                int level)
168
{
169
    PhysPageEntry *p;
170
    int i;
171
    hwaddr step = (hwaddr)1 << (level * L2_BITS);
172

    
173
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
174
        lp->ptr = phys_map_node_alloc();
175
        p = next_map.nodes[lp->ptr];
176
        if (level == 0) {
177
            for (i = 0; i < L2_SIZE; i++) {
178
                p[i].is_leaf = 1;
179
                p[i].ptr = PHYS_SECTION_UNASSIGNED;
180
            }
181
        }
182
    } else {
183
        p = next_map.nodes[lp->ptr];
184
    }
185
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
186

    
187
    while (*nb && lp < &p[L2_SIZE]) {
188
        if ((*index & (step - 1)) == 0 && *nb >= step) {
189
            lp->is_leaf = true;
190
            lp->ptr = leaf;
191
            *index += step;
192
            *nb -= step;
193
        } else {
194
            phys_page_set_level(lp, index, nb, leaf, level - 1);
195
        }
196
        ++lp;
197
    }
198
}
199

    
200
static void phys_page_set(AddressSpaceDispatch *d,
201
                          hwaddr index, hwaddr nb,
202
                          uint16_t leaf)
203
{
204
    /* Wildly overreserve - it doesn't matter much. */
205
    phys_map_node_reserve(3 * P_L2_LEVELS);
206

    
207
    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
208
}
209

    
210
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
211
                                           Node *nodes, MemoryRegionSection *sections)
212
{
213
    PhysPageEntry *p;
214
    int i;
215

    
216
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
217
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
218
            return &sections[PHYS_SECTION_UNASSIGNED];
219
        }
220
        p = nodes[lp.ptr];
221
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
222
    }
223
    return &sections[lp.ptr];
224
}
225

    
226
bool memory_region_is_unassigned(MemoryRegion *mr)
227
{
228
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
229
        && mr != &io_mem_watch;
230
}
231

    
232
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
233
                                                        hwaddr addr,
234
                                                        bool resolve_subpage)
235
{
236
    MemoryRegionSection *section;
237
    subpage_t *subpage;
238

    
239
    section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
240
                             d->nodes, d->sections);
241
    if (resolve_subpage && section->mr->subpage) {
242
        subpage = container_of(section->mr, subpage_t, iomem);
243
        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
244
    }
245
    return section;
246
}
247

    
248
static MemoryRegionSection *
249
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
250
                                 hwaddr *plen, bool resolve_subpage)
251
{
252
    MemoryRegionSection *section;
253
    Int128 diff;
254

    
255
    section = address_space_lookup_region(d, addr, resolve_subpage);
256
    /* Compute offset within MemoryRegionSection */
257
    addr -= section->offset_within_address_space;
258

    
259
    /* Compute offset within MemoryRegion */
260
    *xlat = addr + section->offset_within_region;
261

    
262
    diff = int128_sub(section->mr->size, int128_make64(addr));
263
    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
264
    return section;
265
}
266

    
267
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
268
                                      hwaddr *xlat, hwaddr *plen,
269
                                      bool is_write)
270
{
271
    IOMMUTLBEntry iotlb;
272
    MemoryRegionSection *section;
273
    MemoryRegion *mr;
274
    hwaddr len = *plen;
275

    
276
    for (;;) {
277
        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
278
        mr = section->mr;
279

    
280
        if (!mr->iommu_ops) {
281
            break;
282
        }
283

    
284
        iotlb = mr->iommu_ops->translate(mr, addr);
285
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
286
                | (addr & iotlb.addr_mask));
287
        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
288
        if (!(iotlb.perm & (1 << is_write))) {
289
            mr = &io_mem_unassigned;
290
            break;
291
        }
292

    
293
        as = iotlb.target_as;
294
    }
295

    
296
    *plen = len;
297
    *xlat = addr;
298
    return mr;
299
}
300

    
301
MemoryRegionSection *
302
address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
303
                                  hwaddr *plen)
304
{
305
    MemoryRegionSection *section;
306
    section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
307

    
308
    assert(!section->mr->iommu_ops);
309
    return section;
310
}
311
#endif
312

    
313
void cpu_exec_init_all(void)
314
{
315
#if !defined(CONFIG_USER_ONLY)
316
    qemu_mutex_init(&ram_list.mutex);
317
    memory_map_init();
318
    io_mem_init();
319
#endif
320
}
321

    
322
#if !defined(CONFIG_USER_ONLY)
323

    
324
static int cpu_common_post_load(void *opaque, int version_id)
325
{
326
    CPUState *cpu = opaque;
327

    
328
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
329
       version_id is increased. */
330
    cpu->interrupt_request &= ~0x01;
331
    tlb_flush(cpu->env_ptr, 1);
332

    
333
    return 0;
334
}
335

    
336
const VMStateDescription vmstate_cpu_common = {
337
    .name = "cpu_common",
338
    .version_id = 1,
339
    .minimum_version_id = 1,
340
    .minimum_version_id_old = 1,
341
    .post_load = cpu_common_post_load,
342
    .fields      = (VMStateField []) {
343
        VMSTATE_UINT32(halted, CPUState),
344
        VMSTATE_UINT32(interrupt_request, CPUState),
345
        VMSTATE_END_OF_LIST()
346
    }
347
};
348

    
349
#endif
350

    
351
CPUState *qemu_get_cpu(int index)
352
{
353
    CPUState *cpu;
354

    
355
    CPU_FOREACH(cpu) {
356
        if (cpu->cpu_index == index) {
357
            return cpu;
358
        }
359
    }
360

    
361
    return NULL;
362
}
363

    
364
void cpu_exec_init(CPUArchState *env)
365
{
366
    CPUState *cpu = ENV_GET_CPU(env);
367
    CPUClass *cc = CPU_GET_CLASS(cpu);
368
    CPUState *some_cpu;
369
    int cpu_index;
370

    
371
#if defined(CONFIG_USER_ONLY)
372
    cpu_list_lock();
373
#endif
374
    cpu_index = 0;
375
    CPU_FOREACH(some_cpu) {
376
        cpu_index++;
377
    }
378
    cpu->cpu_index = cpu_index;
379
    cpu->numa_node = 0;
380
    QTAILQ_INIT(&env->breakpoints);
381
    QTAILQ_INIT(&env->watchpoints);
382
#ifndef CONFIG_USER_ONLY
383
    cpu->thread_id = qemu_get_thread_id();
384
#endif
385
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
386
#if defined(CONFIG_USER_ONLY)
387
    cpu_list_unlock();
388
#endif
389
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
390
        vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
391
    }
392
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
393
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
394
                    cpu_save, cpu_load, env);
395
    assert(cc->vmsd == NULL);
396
    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
397
#endif
398
    if (cc->vmsd != NULL) {
399
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
400
    }
401
}
402

    
403
#if defined(TARGET_HAS_ICE)
404
#if defined(CONFIG_USER_ONLY)
405
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
406
{
407
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
408
}
409
#else
410
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
411
{
412
    hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
413
    if (phys != -1) {
414
        tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
415
    }
416
}
417
#endif
418
#endif /* TARGET_HAS_ICE */
419

    
420
#if defined(CONFIG_USER_ONLY)
421
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
422

    
423
{
424
}
425

    
426
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
427
                          int flags, CPUWatchpoint **watchpoint)
428
{
429
    return -ENOSYS;
430
}
431
#else
432
/* Add a watchpoint.  */
433
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
434
                          int flags, CPUWatchpoint **watchpoint)
435
{
436
    target_ulong len_mask = ~(len - 1);
437
    CPUWatchpoint *wp;
438

    
439
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
440
    if ((len & (len - 1)) || (addr & ~len_mask) ||
441
            len == 0 || len > TARGET_PAGE_SIZE) {
442
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
443
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
444
        return -EINVAL;
445
    }
446
    wp = g_malloc(sizeof(*wp));
447

    
448
    wp->vaddr = addr;
449
    wp->len_mask = len_mask;
450
    wp->flags = flags;
451

    
452
    /* keep all GDB-injected watchpoints in front */
453
    if (flags & BP_GDB)
454
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
455
    else
456
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
457

    
458
    tlb_flush_page(env, addr);
459

    
460
    if (watchpoint)
461
        *watchpoint = wp;
462
    return 0;
463
}
464

    
465
/* Remove a specific watchpoint.  */
466
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
467
                          int flags)
468
{
469
    target_ulong len_mask = ~(len - 1);
470
    CPUWatchpoint *wp;
471

    
472
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
473
        if (addr == wp->vaddr && len_mask == wp->len_mask
474
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
475
            cpu_watchpoint_remove_by_ref(env, wp);
476
            return 0;
477
        }
478
    }
479
    return -ENOENT;
480
}
481

    
482
/* Remove a specific watchpoint by reference.  */
483
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
484
{
485
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
486

    
487
    tlb_flush_page(env, watchpoint->vaddr);
488

    
489
    g_free(watchpoint);
490
}
491

    
492
/* Remove all matching watchpoints.  */
493
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
494
{
495
    CPUWatchpoint *wp, *next;
496

    
497
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
498
        if (wp->flags & mask)
499
            cpu_watchpoint_remove_by_ref(env, wp);
500
    }
501
}
502
#endif
503

    
504
/* Add a breakpoint.  */
505
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
506
                          CPUBreakpoint **breakpoint)
507
{
508
#if defined(TARGET_HAS_ICE)
509
    CPUBreakpoint *bp;
510

    
511
    bp = g_malloc(sizeof(*bp));
512

    
513
    bp->pc = pc;
514
    bp->flags = flags;
515

    
516
    /* keep all GDB-injected breakpoints in front */
517
    if (flags & BP_GDB) {
518
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
519
    } else {
520
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
521
    }
522

    
523
    breakpoint_invalidate(ENV_GET_CPU(env), pc);
524

    
525
    if (breakpoint) {
526
        *breakpoint = bp;
527
    }
528
    return 0;
529
#else
530
    return -ENOSYS;
531
#endif
532
}
533

    
534
/* Remove a specific breakpoint.  */
535
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
536
{
537
#if defined(TARGET_HAS_ICE)
538
    CPUBreakpoint *bp;
539

    
540
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
541
        if (bp->pc == pc && bp->flags == flags) {
542
            cpu_breakpoint_remove_by_ref(env, bp);
543
            return 0;
544
        }
545
    }
546
    return -ENOENT;
547
#else
548
    return -ENOSYS;
549
#endif
550
}
551

    
552
/* Remove a specific breakpoint by reference.  */
553
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
554
{
555
#if defined(TARGET_HAS_ICE)
556
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
557

    
558
    breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
559

    
560
    g_free(breakpoint);
561
#endif
562
}
563

    
564
/* Remove all matching breakpoints. */
565
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
566
{
567
#if defined(TARGET_HAS_ICE)
568
    CPUBreakpoint *bp, *next;
569

    
570
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
571
        if (bp->flags & mask)
572
            cpu_breakpoint_remove_by_ref(env, bp);
573
    }
574
#endif
575
}
576

    
577
/* enable or disable single step mode. EXCP_DEBUG is returned by the
578
   CPU loop after each instruction */
579
void cpu_single_step(CPUState *cpu, int enabled)
580
{
581
#if defined(TARGET_HAS_ICE)
582
    if (cpu->singlestep_enabled != enabled) {
583
        cpu->singlestep_enabled = enabled;
584
        if (kvm_enabled()) {
585
            kvm_update_guest_debug(cpu, 0);
586
        } else {
587
            /* must flush all the translated code to avoid inconsistencies */
588
            /* XXX: only flush what is necessary */
589
            CPUArchState *env = cpu->env_ptr;
590
            tb_flush(env);
591
        }
592
    }
593
#endif
594
}
595

    
596
void cpu_abort(CPUArchState *env, const char *fmt, ...)
597
{
598
    CPUState *cpu = ENV_GET_CPU(env);
599
    va_list ap;
600
    va_list ap2;
601

    
602
    va_start(ap, fmt);
603
    va_copy(ap2, ap);
604
    fprintf(stderr, "qemu: fatal: ");
605
    vfprintf(stderr, fmt, ap);
606
    fprintf(stderr, "\n");
607
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
608
    if (qemu_log_enabled()) {
609
        qemu_log("qemu: fatal: ");
610
        qemu_log_vprintf(fmt, ap2);
611
        qemu_log("\n");
612
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
613
        qemu_log_flush();
614
        qemu_log_close();
615
    }
616
    va_end(ap2);
617
    va_end(ap);
618
#if defined(CONFIG_USER_ONLY)
619
    {
620
        struct sigaction act;
621
        sigfillset(&act.sa_mask);
622
        act.sa_handler = SIG_DFL;
623
        sigaction(SIGABRT, &act, NULL);
624
    }
625
#endif
626
    abort();
627
}
628

    
629
#if !defined(CONFIG_USER_ONLY)
630
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
631
{
632
    RAMBlock *block;
633

    
634
    /* The list is protected by the iothread lock here.  */
635
    block = ram_list.mru_block;
636
    if (block && addr - block->offset < block->length) {
637
        goto found;
638
    }
639
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
640
        if (addr - block->offset < block->length) {
641
            goto found;
642
        }
643
    }
644

    
645
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
646
    abort();
647

    
648
found:
649
    ram_list.mru_block = block;
650
    return block;
651
}
652

    
653
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
654
                                      uintptr_t length)
655
{
656
    RAMBlock *block;
657
    ram_addr_t start1;
658

    
659
    block = qemu_get_ram_block(start);
660
    assert(block == qemu_get_ram_block(end - 1));
661
    start1 = (uintptr_t)block->host + (start - block->offset);
662
    cpu_tlb_reset_dirty_all(start1, length);
663
}
664

    
665
/* Note: start and end must be within the same ram block.  */
666
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
667
                                     int dirty_flags)
668
{
669
    uintptr_t length;
670

    
671
    start &= TARGET_PAGE_MASK;
672
    end = TARGET_PAGE_ALIGN(end);
673

    
674
    length = end - start;
675
    if (length == 0)
676
        return;
677
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
678

    
679
    if (tcg_enabled()) {
680
        tlb_reset_dirty_range_all(start, end, length);
681
    }
682
}
683

    
684
static int cpu_physical_memory_set_dirty_tracking(int enable)
685
{
686
    int ret = 0;
687
    in_migration = enable;
688
    return ret;
689
}
690

    
691
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
692
                                       MemoryRegionSection *section,
693
                                       target_ulong vaddr,
694
                                       hwaddr paddr, hwaddr xlat,
695
                                       int prot,
696
                                       target_ulong *address)
697
{
698
    hwaddr iotlb;
699
    CPUWatchpoint *wp;
700

    
701
    if (memory_region_is_ram(section->mr)) {
702
        /* Normal RAM.  */
703
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
704
            + xlat;
705
        if (!section->readonly) {
706
            iotlb |= PHYS_SECTION_NOTDIRTY;
707
        } else {
708
            iotlb |= PHYS_SECTION_ROM;
709
        }
710
    } else {
711
        iotlb = section - address_space_memory.dispatch->sections;
712
        iotlb += xlat;
713
    }
714

    
715
    /* Make accesses to pages with watchpoints go via the
716
       watchpoint trap routines.  */
717
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
718
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
719
            /* Avoid trapping reads of pages with a write breakpoint. */
720
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
721
                iotlb = PHYS_SECTION_WATCH + paddr;
722
                *address |= TLB_MMIO;
723
                break;
724
            }
725
        }
726
    }
727

    
728
    return iotlb;
729
}
730
#endif /* defined(CONFIG_USER_ONLY) */
731

    
732
#if !defined(CONFIG_USER_ONLY)
733

    
734
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
735
                             uint16_t section);
736
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
737

    
738
static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
739

    
740
/*
741
 * Set a custom physical guest memory alloator.
742
 * Accelerators with unusual needs may need this.  Hopefully, we can
743
 * get rid of it eventually.
744
 */
745
void phys_mem_set_alloc(void *(*alloc)(size_t))
746
{
747
    phys_mem_alloc = alloc;
748
}
749

    
750
static uint16_t phys_section_add(MemoryRegionSection *section)
751
{
752
    /* The physical section number is ORed with a page-aligned
753
     * pointer to produce the iotlb entries.  Thus it should
754
     * never overflow into the page-aligned value.
755
     */
756
    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
757

    
758
    if (next_map.sections_nb == next_map.sections_nb_alloc) {
759
        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
760
                                         16);
761
        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
762
                                    next_map.sections_nb_alloc);
763
    }
764
    next_map.sections[next_map.sections_nb] = *section;
765
    memory_region_ref(section->mr);
766
    return next_map.sections_nb++;
767
}
768

    
769
static void phys_section_destroy(MemoryRegion *mr)
770
{
771
    memory_region_unref(mr);
772

    
773
    if (mr->subpage) {
774
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
775
        memory_region_destroy(&subpage->iomem);
776
        g_free(subpage);
777
    }
778
}
779

    
780
static void phys_sections_free(PhysPageMap *map)
781
{
782
    while (map->sections_nb > 0) {
783
        MemoryRegionSection *section = &map->sections[--map->sections_nb];
784
        phys_section_destroy(section->mr);
785
    }
786
    g_free(map->sections);
787
    g_free(map->nodes);
788
    g_free(map);
789
}
790

    
791
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
792
{
793
    subpage_t *subpage;
794
    hwaddr base = section->offset_within_address_space
795
        & TARGET_PAGE_MASK;
796
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
797
                                                   next_map.nodes, next_map.sections);
798
    MemoryRegionSection subsection = {
799
        .offset_within_address_space = base,
800
        .size = int128_make64(TARGET_PAGE_SIZE),
801
    };
802
    hwaddr start, end;
803

    
804
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
805

    
806
    if (!(existing->mr->subpage)) {
807
        subpage = subpage_init(d->as, base);
808
        subsection.mr = &subpage->iomem;
809
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
810
                      phys_section_add(&subsection));
811
    } else {
812
        subpage = container_of(existing->mr, subpage_t, iomem);
813
    }
814
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
815
    end = start + int128_get64(section->size) - 1;
816
    subpage_register(subpage, start, end, phys_section_add(section));
817
}
818

    
819

    
820
static void register_multipage(AddressSpaceDispatch *d,
821
                               MemoryRegionSection *section)
822
{
823
    hwaddr start_addr = section->offset_within_address_space;
824
    uint16_t section_index = phys_section_add(section);
825
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
826
                                                    TARGET_PAGE_BITS));
827

    
828
    assert(num_pages);
829
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
830
}
831

    
832
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
833
{
834
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
835
    AddressSpaceDispatch *d = as->next_dispatch;
836
    MemoryRegionSection now = *section, remain = *section;
837
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
838

    
839
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
840
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
841
                       - now.offset_within_address_space;
842

    
843
        now.size = int128_min(int128_make64(left), now.size);
844
        register_subpage(d, &now);
845
    } else {
846
        now.size = int128_zero();
847
    }
848
    while (int128_ne(remain.size, now.size)) {
849
        remain.size = int128_sub(remain.size, now.size);
850
        remain.offset_within_address_space += int128_get64(now.size);
851
        remain.offset_within_region += int128_get64(now.size);
852
        now = remain;
853
        if (int128_lt(remain.size, page_size)) {
854
            register_subpage(d, &now);
855
        } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
856
            now.size = page_size;
857
            register_subpage(d, &now);
858
        } else {
859
            now.size = int128_and(now.size, int128_neg(page_size));
860
            register_multipage(d, &now);
861
        }
862
    }
863
}
864

    
865
void qemu_flush_coalesced_mmio_buffer(void)
866
{
867
    if (kvm_enabled())
868
        kvm_flush_coalesced_mmio_buffer();
869
}
870

    
871
void qemu_mutex_lock_ramlist(void)
872
{
873
    qemu_mutex_lock(&ram_list.mutex);
874
}
875

    
876
void qemu_mutex_unlock_ramlist(void)
877
{
878
    qemu_mutex_unlock(&ram_list.mutex);
879
}
880

    
881
#ifdef __linux__
882

    
883
#include <sys/vfs.h>
884

    
885
#define HUGETLBFS_MAGIC       0x958458f6
886

    
887
static long gethugepagesize(const char *path)
888
{
889
    struct statfs fs;
890
    int ret;
891

    
892
    do {
893
        ret = statfs(path, &fs);
894
    } while (ret != 0 && errno == EINTR);
895

    
896
    if (ret != 0) {
897
        perror(path);
898
        return 0;
899
    }
900

    
901
    if (fs.f_type != HUGETLBFS_MAGIC)
902
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
903

    
904
    return fs.f_bsize;
905
}
906

    
907
static sigjmp_buf sigjump;
908

    
909
static void sigbus_handler(int signal)
910
{
911
    siglongjmp(sigjump, 1);
912
}
913

    
914
static void *file_ram_alloc(RAMBlock *block,
915
                            ram_addr_t memory,
916
                            const char *path)
917
{
918
    char *filename;
919
    char *sanitized_name;
920
    char *c;
921
    void *area;
922
    int fd;
923
    unsigned long hpagesize;
924

    
925
    hpagesize = gethugepagesize(path);
926
    if (!hpagesize) {
927
        return NULL;
928
    }
929

    
930
    if (memory < hpagesize) {
931
        return NULL;
932
    }
933

    
934
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
935
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
936
        return NULL;
937
    }
938

    
939
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
940
    sanitized_name = g_strdup(block->mr->name);
941
    for (c = sanitized_name; *c != '\0'; c++) {
942
        if (*c == '/')
943
            *c = '_';
944
    }
945

    
946
    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
947
                               sanitized_name);
948
    g_free(sanitized_name);
949

    
950
    fd = mkstemp(filename);
951
    if (fd < 0) {
952
        perror("unable to create backing store for hugepages");
953
        g_free(filename);
954
        return NULL;
955
    }
956
    unlink(filename);
957
    g_free(filename);
958

    
959
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
960

    
961
    /*
962
     * ftruncate is not supported by hugetlbfs in older
963
     * hosts, so don't bother bailing out on errors.
964
     * If anything goes wrong with it under other filesystems,
965
     * mmap will fail.
966
     */
967
    if (ftruncate(fd, memory))
968
        perror("ftruncate");
969

    
970
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
971
    if (area == MAP_FAILED) {
972
        perror("file_ram_alloc: can't mmap RAM pages");
973
        close(fd);
974
        return (NULL);
975
    }
976

    
977
    if (mem_prealloc) {
978
        int ret, i;
979
        struct sigaction act, oldact;
980
        sigset_t set, oldset;
981

    
982
        memset(&act, 0, sizeof(act));
983
        act.sa_handler = &sigbus_handler;
984
        act.sa_flags = 0;
985

    
986
        ret = sigaction(SIGBUS, &act, &oldact);
987
        if (ret) {
988
            perror("file_ram_alloc: failed to install signal handler");
989
            exit(1);
990
        }
991

    
992
        /* unblock SIGBUS */
993
        sigemptyset(&set);
994
        sigaddset(&set, SIGBUS);
995
        pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
996

    
997
        if (sigsetjmp(sigjump, 1)) {
998
            fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
999
            exit(1);
1000
        }
1001

    
1002
        /* MAP_POPULATE silently ignores failures */
1003
        for (i = 0; i < (memory/hpagesize)-1; i++) {
1004
            memset(area + (hpagesize*i), 0, 1);
1005
        }
1006

    
1007
        ret = sigaction(SIGBUS, &oldact, NULL);
1008
        if (ret) {
1009
            perror("file_ram_alloc: failed to reinstall signal handler");
1010
            exit(1);
1011
        }
1012

    
1013
        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1014
    }
1015

    
1016
    block->fd = fd;
1017
    return area;
1018
}
1019
#else
1020
static void *file_ram_alloc(RAMBlock *block,
1021
                            ram_addr_t memory,
1022
                            const char *path)
1023
{
1024
    fprintf(stderr, "-mem-path not supported on this host\n");
1025
    exit(1);
1026
}
1027
#endif
1028

    
1029
static ram_addr_t find_ram_offset(ram_addr_t size)
1030
{
1031
    RAMBlock *block, *next_block;
1032
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1033

    
1034
    assert(size != 0); /* it would hand out same offset multiple times */
1035

    
1036
    if (QTAILQ_EMPTY(&ram_list.blocks))
1037
        return 0;
1038

    
1039
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1040
        ram_addr_t end, next = RAM_ADDR_MAX;
1041

    
1042
        end = block->offset + block->length;
1043

    
1044
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1045
            if (next_block->offset >= end) {
1046
                next = MIN(next, next_block->offset);
1047
            }
1048
        }
1049
        if (next - end >= size && next - end < mingap) {
1050
            offset = end;
1051
            mingap = next - end;
1052
        }
1053
    }
1054

    
1055
    if (offset == RAM_ADDR_MAX) {
1056
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1057
                (uint64_t)size);
1058
        abort();
1059
    }
1060

    
1061
    return offset;
1062
}
1063

    
1064
ram_addr_t last_ram_offset(void)
1065
{
1066
    RAMBlock *block;
1067
    ram_addr_t last = 0;
1068

    
1069
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
1070
        last = MAX(last, block->offset + block->length);
1071

    
1072
    return last;
1073
}
1074

    
1075
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1076
{
1077
    int ret;
1078

    
1079
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1080
    if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1081
                           "dump-guest-core", true)) {
1082
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1083
        if (ret) {
1084
            perror("qemu_madvise");
1085
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1086
                            "but dump_guest_core=off specified\n");
1087
        }
1088
    }
1089
}
1090

    
1091
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1092
{
1093
    RAMBlock *new_block, *block;
1094

    
1095
    new_block = NULL;
1096
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1097
        if (block->offset == addr) {
1098
            new_block = block;
1099
            break;
1100
        }
1101
    }
1102
    assert(new_block);
1103
    assert(!new_block->idstr[0]);
1104

    
1105
    if (dev) {
1106
        char *id = qdev_get_dev_path(dev);
1107
        if (id) {
1108
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1109
            g_free(id);
1110
        }
1111
    }
1112
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1113

    
1114
    /* This assumes the iothread lock is taken here too.  */
1115
    qemu_mutex_lock_ramlist();
1116
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1117
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1118
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1119
                    new_block->idstr);
1120
            abort();
1121
        }
1122
    }
1123
    qemu_mutex_unlock_ramlist();
1124
}
1125

    
1126
static int memory_try_enable_merging(void *addr, size_t len)
1127
{
1128
    if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1129
        /* disabled by the user */
1130
        return 0;
1131
    }
1132

    
1133
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1134
}
1135

    
1136
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1137
                                   MemoryRegion *mr)
1138
{
1139
    RAMBlock *block, *new_block;
1140

    
1141
    size = TARGET_PAGE_ALIGN(size);
1142
    new_block = g_malloc0(sizeof(*new_block));
1143
    new_block->fd = -1;
1144

    
1145
    /* This assumes the iothread lock is taken here too.  */
1146
    qemu_mutex_lock_ramlist();
1147
    new_block->mr = mr;
1148
    new_block->offset = find_ram_offset(size);
1149
    if (host) {
1150
        new_block->host = host;
1151
        new_block->flags |= RAM_PREALLOC_MASK;
1152
    } else if (xen_enabled()) {
1153
        if (mem_path) {
1154
            fprintf(stderr, "-mem-path not supported with Xen\n");
1155
            exit(1);
1156
        }
1157
        xen_ram_alloc(new_block->offset, size, mr);
1158
    } else {
1159
        if (mem_path) {
1160
            if (phys_mem_alloc != qemu_anon_ram_alloc) {
1161
                /*
1162
                 * file_ram_alloc() needs to allocate just like
1163
                 * phys_mem_alloc, but we haven't bothered to provide
1164
                 * a hook there.
1165
                 */
1166
                fprintf(stderr,
1167
                        "-mem-path not supported with this accelerator\n");
1168
                exit(1);
1169
            }
1170
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1171
        }
1172
        if (!new_block->host) {
1173
            new_block->host = phys_mem_alloc(size);
1174
            if (!new_block->host) {
1175
                fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1176
                        new_block->mr->name, strerror(errno));
1177
                exit(1);
1178
            }
1179
            memory_try_enable_merging(new_block->host, size);
1180
        }
1181
    }
1182
    new_block->length = size;
1183

    
1184
    /* Keep the list sorted from biggest to smallest block.  */
1185
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1186
        if (block->length < new_block->length) {
1187
            break;
1188
        }
1189
    }
1190
    if (block) {
1191
        QTAILQ_INSERT_BEFORE(block, new_block, next);
1192
    } else {
1193
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1194
    }
1195
    ram_list.mru_block = NULL;
1196

    
1197
    ram_list.version++;
1198
    qemu_mutex_unlock_ramlist();
1199

    
1200
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1201
                                       last_ram_offset() >> TARGET_PAGE_BITS);
1202
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1203
           0, size >> TARGET_PAGE_BITS);
1204
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1205

    
1206
    qemu_ram_setup_dump(new_block->host, size);
1207
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1208
    qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1209

    
1210
    if (kvm_enabled())
1211
        kvm_setup_guest_memory(new_block->host, size);
1212

    
1213
    return new_block->offset;
1214
}
1215

    
1216
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1217
{
1218
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1219
}
1220

    
1221
void qemu_ram_free_from_ptr(ram_addr_t addr)
1222
{
1223
    RAMBlock *block;
1224

    
1225
    /* This assumes the iothread lock is taken here too.  */
1226
    qemu_mutex_lock_ramlist();
1227
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1228
        if (addr == block->offset) {
1229
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1230
            ram_list.mru_block = NULL;
1231
            ram_list.version++;
1232
            g_free(block);
1233
            break;
1234
        }
1235
    }
1236
    qemu_mutex_unlock_ramlist();
1237
}
1238

    
1239
void qemu_ram_free(ram_addr_t addr)
1240
{
1241
    RAMBlock *block;
1242

    
1243
    /* This assumes the iothread lock is taken here too.  */
1244
    qemu_mutex_lock_ramlist();
1245
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1246
        if (addr == block->offset) {
1247
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1248
            ram_list.mru_block = NULL;
1249
            ram_list.version++;
1250
            if (block->flags & RAM_PREALLOC_MASK) {
1251
                ;
1252
            } else if (xen_enabled()) {
1253
                xen_invalidate_map_cache_entry(block->host);
1254
#ifndef _WIN32
1255
            } else if (block->fd >= 0) {
1256
                munmap(block->host, block->length);
1257
                close(block->fd);
1258
#endif
1259
            } else {
1260
                qemu_anon_ram_free(block->host, block->length);
1261
            }
1262
            g_free(block);
1263
            break;
1264
        }
1265
    }
1266
    qemu_mutex_unlock_ramlist();
1267

    
1268
}
1269

    
1270
#ifndef _WIN32
1271
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1272
{
1273
    RAMBlock *block;
1274
    ram_addr_t offset;
1275
    int flags;
1276
    void *area, *vaddr;
1277

    
1278
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1279
        offset = addr - block->offset;
1280
        if (offset < block->length) {
1281
            vaddr = block->host + offset;
1282
            if (block->flags & RAM_PREALLOC_MASK) {
1283
                ;
1284
            } else if (xen_enabled()) {
1285
                abort();
1286
            } else {
1287
                flags = MAP_FIXED;
1288
                munmap(vaddr, length);
1289
                if (block->fd >= 0) {
1290
#ifdef MAP_POPULATE
1291
                    flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1292
                        MAP_PRIVATE;
1293
#else
1294
                    flags |= MAP_PRIVATE;
1295
#endif
1296
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1297
                                flags, block->fd, offset);
1298
                } else {
1299
                    /*
1300
                     * Remap needs to match alloc.  Accelerators that
1301
                     * set phys_mem_alloc never remap.  If they did,
1302
                     * we'd need a remap hook here.
1303
                     */
1304
                    assert(phys_mem_alloc == qemu_anon_ram_alloc);
1305

    
1306
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1307
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1308
                                flags, -1, 0);
1309
                }
1310
                if (area != vaddr) {
1311
                    fprintf(stderr, "Could not remap addr: "
1312
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1313
                            length, addr);
1314
                    exit(1);
1315
                }
1316
                memory_try_enable_merging(vaddr, length);
1317
                qemu_ram_setup_dump(vaddr, length);
1318
            }
1319
            return;
1320
        }
1321
    }
1322
}
1323
#endif /* !_WIN32 */
1324

    
1325
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1326
   With the exception of the softmmu code in this file, this should
1327
   only be used for local memory (e.g. video ram) that the device owns,
1328
   and knows it isn't going to access beyond the end of the block.
1329

1330
   It should not be used for general purpose DMA.
1331
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1332
 */
1333
void *qemu_get_ram_ptr(ram_addr_t addr)
1334
{
1335
    RAMBlock *block = qemu_get_ram_block(addr);
1336

    
1337
    if (xen_enabled()) {
1338
        /* We need to check if the requested address is in the RAM
1339
         * because we don't want to map the entire memory in QEMU.
1340
         * In that case just map until the end of the page.
1341
         */
1342
        if (block->offset == 0) {
1343
            return xen_map_cache(addr, 0, 0);
1344
        } else if (block->host == NULL) {
1345
            block->host =
1346
                xen_map_cache(block->offset, block->length, 1);
1347
        }
1348
    }
1349
    return block->host + (addr - block->offset);
1350
}
1351

    
1352
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1353
 * but takes a size argument */
1354
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1355
{
1356
    if (*size == 0) {
1357
        return NULL;
1358
    }
1359
    if (xen_enabled()) {
1360
        return xen_map_cache(addr, *size, 1);
1361
    } else {
1362
        RAMBlock *block;
1363

    
1364
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1365
            if (addr - block->offset < block->length) {
1366
                if (addr - block->offset + *size > block->length)
1367
                    *size = block->length - addr + block->offset;
1368
                return block->host + (addr - block->offset);
1369
            }
1370
        }
1371

    
1372
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1373
        abort();
1374
    }
1375
}
1376

    
1377
/* Some of the softmmu routines need to translate from a host pointer
1378
   (typically a TLB entry) back to a ram offset.  */
1379
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1380
{
1381
    RAMBlock *block;
1382
    uint8_t *host = ptr;
1383

    
1384
    if (xen_enabled()) {
1385
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1386
        return qemu_get_ram_block(*ram_addr)->mr;
1387
    }
1388

    
1389
    block = ram_list.mru_block;
1390
    if (block && block->host && host - block->host < block->length) {
1391
        goto found;
1392
    }
1393

    
1394
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1395
        /* This case append when the block is not mapped. */
1396
        if (block->host == NULL) {
1397
            continue;
1398
        }
1399
        if (host - block->host < block->length) {
1400
            goto found;
1401
        }
1402
    }
1403

    
1404
    return NULL;
1405

    
1406
found:
1407
    *ram_addr = block->offset + (host - block->host);
1408
    return block->mr;
1409
}
1410

    
1411
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1412
                               uint64_t val, unsigned size)
1413
{
1414
    int dirty_flags;
1415
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1416
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1417
        tb_invalidate_phys_page_fast(ram_addr, size);
1418
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1419
    }
1420
    switch (size) {
1421
    case 1:
1422
        stb_p(qemu_get_ram_ptr(ram_addr), val);
1423
        break;
1424
    case 2:
1425
        stw_p(qemu_get_ram_ptr(ram_addr), val);
1426
        break;
1427
    case 4:
1428
        stl_p(qemu_get_ram_ptr(ram_addr), val);
1429
        break;
1430
    default:
1431
        abort();
1432
    }
1433
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1434
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1435
    /* we remove the notdirty callback only if the code has been
1436
       flushed */
1437
    if (dirty_flags == 0xff) {
1438
        CPUArchState *env = current_cpu->env_ptr;
1439
        tlb_set_dirty(env, env->mem_io_vaddr);
1440
    }
1441
}
1442

    
1443
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1444
                                 unsigned size, bool is_write)
1445
{
1446
    return is_write;
1447
}
1448

    
1449
static const MemoryRegionOps notdirty_mem_ops = {
1450
    .write = notdirty_mem_write,
1451
    .valid.accepts = notdirty_mem_accepts,
1452
    .endianness = DEVICE_NATIVE_ENDIAN,
1453
};
1454

    
1455
/* Generate a debug exception if a watchpoint has been hit.  */
1456
static void check_watchpoint(int offset, int len_mask, int flags)
1457
{
1458
    CPUArchState *env = current_cpu->env_ptr;
1459
    target_ulong pc, cs_base;
1460
    target_ulong vaddr;
1461
    CPUWatchpoint *wp;
1462
    int cpu_flags;
1463

    
1464
    if (env->watchpoint_hit) {
1465
        /* We re-entered the check after replacing the TB. Now raise
1466
         * the debug interrupt so that is will trigger after the
1467
         * current instruction. */
1468
        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1469
        return;
1470
    }
1471
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1472
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1473
        if ((vaddr == (wp->vaddr & len_mask) ||
1474
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1475
            wp->flags |= BP_WATCHPOINT_HIT;
1476
            if (!env->watchpoint_hit) {
1477
                env->watchpoint_hit = wp;
1478
                tb_check_watchpoint(env);
1479
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1480
                    env->exception_index = EXCP_DEBUG;
1481
                    cpu_loop_exit(env);
1482
                } else {
1483
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1484
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1485
                    cpu_resume_from_signal(env, NULL);
1486
                }
1487
            }
1488
        } else {
1489
            wp->flags &= ~BP_WATCHPOINT_HIT;
1490
        }
1491
    }
1492
}
1493

    
1494
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1495
   so these check for a hit then pass through to the normal out-of-line
1496
   phys routines.  */
1497
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1498
                               unsigned size)
1499
{
1500
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1501
    switch (size) {
1502
    case 1: return ldub_phys(addr);
1503
    case 2: return lduw_phys(addr);
1504
    case 4: return ldl_phys(addr);
1505
    default: abort();
1506
    }
1507
}
1508

    
1509
static void watch_mem_write(void *opaque, hwaddr addr,
1510
                            uint64_t val, unsigned size)
1511
{
1512
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1513
    switch (size) {
1514
    case 1:
1515
        stb_phys(addr, val);
1516
        break;
1517
    case 2:
1518
        stw_phys(addr, val);
1519
        break;
1520
    case 4:
1521
        stl_phys(addr, val);
1522
        break;
1523
    default: abort();
1524
    }
1525
}
1526

    
1527
static const MemoryRegionOps watch_mem_ops = {
1528
    .read = watch_mem_read,
1529
    .write = watch_mem_write,
1530
    .endianness = DEVICE_NATIVE_ENDIAN,
1531
};
1532

    
1533
static uint64_t subpage_read(void *opaque, hwaddr addr,
1534
                             unsigned len)
1535
{
1536
    subpage_t *subpage = opaque;
1537
    uint8_t buf[4];
1538

    
1539
#if defined(DEBUG_SUBPAGE)
1540
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1541
           subpage, len, addr);
1542
#endif
1543
    address_space_read(subpage->as, addr + subpage->base, buf, len);
1544
    switch (len) {
1545
    case 1:
1546
        return ldub_p(buf);
1547
    case 2:
1548
        return lduw_p(buf);
1549
    case 4:
1550
        return ldl_p(buf);
1551
    default:
1552
        abort();
1553
    }
1554
}
1555

    
1556
static void subpage_write(void *opaque, hwaddr addr,
1557
                          uint64_t value, unsigned len)
1558
{
1559
    subpage_t *subpage = opaque;
1560
    uint8_t buf[4];
1561

    
1562
#if defined(DEBUG_SUBPAGE)
1563
    printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1564
           " value %"PRIx64"\n",
1565
           __func__, subpage, len, addr, value);
1566
#endif
1567
    switch (len) {
1568
    case 1:
1569
        stb_p(buf, value);
1570
        break;
1571
    case 2:
1572
        stw_p(buf, value);
1573
        break;
1574
    case 4:
1575
        stl_p(buf, value);
1576
        break;
1577
    default:
1578
        abort();
1579
    }
1580
    address_space_write(subpage->as, addr + subpage->base, buf, len);
1581
}
1582

    
1583
static bool subpage_accepts(void *opaque, hwaddr addr,
1584
                            unsigned len, bool is_write)
1585
{
1586
    subpage_t *subpage = opaque;
1587
#if defined(DEBUG_SUBPAGE)
1588
    printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1589
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
1590
#endif
1591

    
1592
    return address_space_access_valid(subpage->as, addr + subpage->base,
1593
                                      len, is_write);
1594
}
1595

    
1596
static const MemoryRegionOps subpage_ops = {
1597
    .read = subpage_read,
1598
    .write = subpage_write,
1599
    .valid.accepts = subpage_accepts,
1600
    .endianness = DEVICE_NATIVE_ENDIAN,
1601
};
1602

    
1603
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1604
                             uint16_t section)
1605
{
1606
    int idx, eidx;
1607

    
1608
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1609
        return -1;
1610
    idx = SUBPAGE_IDX(start);
1611
    eidx = SUBPAGE_IDX(end);
1612
#if defined(DEBUG_SUBPAGE)
1613
    printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1614
           __func__, mmio, start, end, idx, eidx, section);
1615
#endif
1616
    for (; idx <= eidx; idx++) {
1617
        mmio->sub_section[idx] = section;
1618
    }
1619

    
1620
    return 0;
1621
}
1622

    
1623
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1624
{
1625
    subpage_t *mmio;
1626

    
1627
    mmio = g_malloc0(sizeof(subpage_t));
1628

    
1629
    mmio->as = as;
1630
    mmio->base = base;
1631
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1632
                          "subpage", TARGET_PAGE_SIZE);
1633
    mmio->iomem.subpage = true;
1634
#if defined(DEBUG_SUBPAGE)
1635
    printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1636
           mmio, base, TARGET_PAGE_SIZE);
1637
#endif
1638
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1639

    
1640
    return mmio;
1641
}
1642

    
1643
static uint16_t dummy_section(MemoryRegion *mr)
1644
{
1645
    MemoryRegionSection section = {
1646
        .mr = mr,
1647
        .offset_within_address_space = 0,
1648
        .offset_within_region = 0,
1649
        .size = int128_2_64(),
1650
    };
1651

    
1652
    return phys_section_add(&section);
1653
}
1654

    
1655
MemoryRegion *iotlb_to_region(hwaddr index)
1656
{
1657
    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
1658
}
1659

    
1660
static void io_mem_init(void)
1661
{
1662
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1663
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1664
                          "unassigned", UINT64_MAX);
1665
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1666
                          "notdirty", UINT64_MAX);
1667
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1668
                          "watch", UINT64_MAX);
1669
}
1670

    
1671
static void mem_begin(MemoryListener *listener)
1672
{
1673
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1674
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1675

    
1676
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1677
    d->as = as;
1678
    as->next_dispatch = d;
1679
}
1680

    
1681
static void mem_commit(MemoryListener *listener)
1682
{
1683
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1684
    AddressSpaceDispatch *cur = as->dispatch;
1685
    AddressSpaceDispatch *next = as->next_dispatch;
1686

    
1687
    next->nodes = next_map.nodes;
1688
    next->sections = next_map.sections;
1689

    
1690
    as->dispatch = next;
1691
    g_free(cur);
1692
}
1693

    
1694
static void core_begin(MemoryListener *listener)
1695
{
1696
    uint16_t n;
1697

    
1698
    prev_map = g_new(PhysPageMap, 1);
1699
    *prev_map = next_map;
1700

    
1701
    memset(&next_map, 0, sizeof(next_map));
1702
    n = dummy_section(&io_mem_unassigned);
1703
    assert(n == PHYS_SECTION_UNASSIGNED);
1704
    n = dummy_section(&io_mem_notdirty);
1705
    assert(n == PHYS_SECTION_NOTDIRTY);
1706
    n = dummy_section(&io_mem_rom);
1707
    assert(n == PHYS_SECTION_ROM);
1708
    n = dummy_section(&io_mem_watch);
1709
    assert(n == PHYS_SECTION_WATCH);
1710
}
1711

    
1712
/* This listener's commit run after the other AddressSpaceDispatch listeners'.
1713
 * All AddressSpaceDispatch instances have switched to the next map.
1714
 */
1715
static void core_commit(MemoryListener *listener)
1716
{
1717
    phys_sections_free(prev_map);
1718
}
1719

    
1720
static void tcg_commit(MemoryListener *listener)
1721
{
1722
    CPUState *cpu;
1723

    
1724
    /* since each CPU stores ram addresses in its TLB cache, we must
1725
       reset the modified entries */
1726
    /* XXX: slow ! */
1727
    CPU_FOREACH(cpu) {
1728
        CPUArchState *env = cpu->env_ptr;
1729

    
1730
        tlb_flush(env, 1);
1731
    }
1732
}
1733

    
1734
static void core_log_global_start(MemoryListener *listener)
1735
{
1736
    cpu_physical_memory_set_dirty_tracking(1);
1737
}
1738

    
1739
static void core_log_global_stop(MemoryListener *listener)
1740
{
1741
    cpu_physical_memory_set_dirty_tracking(0);
1742
}
1743

    
1744
static MemoryListener core_memory_listener = {
1745
    .begin = core_begin,
1746
    .commit = core_commit,
1747
    .log_global_start = core_log_global_start,
1748
    .log_global_stop = core_log_global_stop,
1749
    .priority = 1,
1750
};
1751

    
1752
static MemoryListener tcg_memory_listener = {
1753
    .commit = tcg_commit,
1754
};
1755

    
1756
void address_space_init_dispatch(AddressSpace *as)
1757
{
1758
    as->dispatch = NULL;
1759
    as->dispatch_listener = (MemoryListener) {
1760
        .begin = mem_begin,
1761
        .commit = mem_commit,
1762
        .region_add = mem_add,
1763
        .region_nop = mem_add,
1764
        .priority = 0,
1765
    };
1766
    memory_listener_register(&as->dispatch_listener, as);
1767
}
1768

    
1769
void address_space_destroy_dispatch(AddressSpace *as)
1770
{
1771
    AddressSpaceDispatch *d = as->dispatch;
1772

    
1773
    memory_listener_unregister(&as->dispatch_listener);
1774
    g_free(d);
1775
    as->dispatch = NULL;
1776
}
1777

    
1778
static void memory_map_init(void)
1779
{
1780
    system_memory = g_malloc(sizeof(*system_memory));
1781
    memory_region_init(system_memory, NULL, "system", INT64_MAX);
1782
    address_space_init(&address_space_memory, system_memory, "memory");
1783

    
1784
    system_io = g_malloc(sizeof(*system_io));
1785
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1786
                          65536);
1787
    address_space_init(&address_space_io, system_io, "I/O");
1788

    
1789
    memory_listener_register(&core_memory_listener, &address_space_memory);
1790
    if (tcg_enabled()) {
1791
        memory_listener_register(&tcg_memory_listener, &address_space_memory);
1792
    }
1793
}
1794

    
1795
MemoryRegion *get_system_memory(void)
1796
{
1797
    return system_memory;
1798
}
1799

    
1800
MemoryRegion *get_system_io(void)
1801
{
1802
    return system_io;
1803
}
1804

    
1805
#endif /* !defined(CONFIG_USER_ONLY) */
1806

    
1807
/* physical memory access (slow version, mainly for debug) */
1808
#if defined(CONFIG_USER_ONLY)
1809
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1810
                        uint8_t *buf, int len, int is_write)
1811
{
1812
    int l, flags;
1813
    target_ulong page;
1814
    void * p;
1815

    
1816
    while (len > 0) {
1817
        page = addr & TARGET_PAGE_MASK;
1818
        l = (page + TARGET_PAGE_SIZE) - addr;
1819
        if (l > len)
1820
            l = len;
1821
        flags = page_get_flags(page);
1822
        if (!(flags & PAGE_VALID))
1823
            return -1;
1824
        if (is_write) {
1825
            if (!(flags & PAGE_WRITE))
1826
                return -1;
1827
            /* XXX: this code should not depend on lock_user */
1828
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1829
                return -1;
1830
            memcpy(p, buf, l);
1831
            unlock_user(p, addr, l);
1832
        } else {
1833
            if (!(flags & PAGE_READ))
1834
                return -1;
1835
            /* XXX: this code should not depend on lock_user */
1836
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1837
                return -1;
1838
            memcpy(buf, p, l);
1839
            unlock_user(p, addr, 0);
1840
        }
1841
        len -= l;
1842
        buf += l;
1843
        addr += l;
1844
    }
1845
    return 0;
1846
}
1847

    
1848
#else
1849

    
1850
static void invalidate_and_set_dirty(hwaddr addr,
1851
                                     hwaddr length)
1852
{
1853
    if (!cpu_physical_memory_is_dirty(addr)) {
1854
        /* invalidate code */
1855
        tb_invalidate_phys_page_range(addr, addr + length, 0);
1856
        /* set dirty bit */
1857
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1858
    }
1859
    xen_modified_memory(addr, length);
1860
}
1861

    
1862
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1863
{
1864
    if (memory_region_is_ram(mr)) {
1865
        return !(is_write && mr->readonly);
1866
    }
1867
    if (memory_region_is_romd(mr)) {
1868
        return !is_write;
1869
    }
1870

    
1871
    return false;
1872
}
1873

    
1874
static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1875
{
1876
    unsigned access_size_max = mr->ops->valid.max_access_size;
1877

    
1878
    /* Regions are assumed to support 1-4 byte accesses unless
1879
       otherwise specified.  */
1880
    if (access_size_max == 0) {
1881
        access_size_max = 4;
1882
    }
1883

    
1884
    /* Bound the maximum access by the alignment of the address.  */
1885
    if (!mr->ops->impl.unaligned) {
1886
        unsigned align_size_max = addr & -addr;
1887
        if (align_size_max != 0 && align_size_max < access_size_max) {
1888
            access_size_max = align_size_max;
1889
        }
1890
    }
1891

    
1892
    /* Don't attempt accesses larger than the maximum.  */
1893
    if (l > access_size_max) {
1894
        l = access_size_max;
1895
    }
1896
    if (l & (l - 1)) {
1897
        l = 1 << (qemu_fls(l) - 1);
1898
    }
1899

    
1900
    return l;
1901
}
1902

    
1903
bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1904
                      int len, bool is_write)
1905
{
1906
    hwaddr l;
1907
    uint8_t *ptr;
1908
    uint64_t val;
1909
    hwaddr addr1;
1910
    MemoryRegion *mr;
1911
    bool error = false;
1912

    
1913
    while (len > 0) {
1914
        l = len;
1915
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
1916

    
1917
        if (is_write) {
1918
            if (!memory_access_is_direct(mr, is_write)) {
1919
                l = memory_access_size(mr, l, addr1);
1920
                /* XXX: could force current_cpu to NULL to avoid
1921
                   potential bugs */
1922
                switch (l) {
1923
                case 8:
1924
                    /* 64 bit write access */
1925
                    val = ldq_p(buf);
1926
                    error |= io_mem_write(mr, addr1, val, 8);
1927
                    break;
1928
                case 4:
1929
                    /* 32 bit write access */
1930
                    val = ldl_p(buf);
1931
                    error |= io_mem_write(mr, addr1, val, 4);
1932
                    break;
1933
                case 2:
1934
                    /* 16 bit write access */
1935
                    val = lduw_p(buf);
1936
                    error |= io_mem_write(mr, addr1, val, 2);
1937
                    break;
1938
                case 1:
1939
                    /* 8 bit write access */
1940
                    val = ldub_p(buf);
1941
                    error |= io_mem_write(mr, addr1, val, 1);
1942
                    break;
1943
                default:
1944
                    abort();
1945
                }
1946
            } else {
1947
                addr1 += memory_region_get_ram_addr(mr);
1948
                /* RAM case */
1949
                ptr = qemu_get_ram_ptr(addr1);
1950
                memcpy(ptr, buf, l);
1951
                invalidate_and_set_dirty(addr1, l);
1952
            }
1953
        } else {
1954
            if (!memory_access_is_direct(mr, is_write)) {
1955
                /* I/O case */
1956
                l = memory_access_size(mr, l, addr1);
1957
                switch (l) {
1958
                case 8:
1959
                    /* 64 bit read access */
1960
                    error |= io_mem_read(mr, addr1, &val, 8);
1961
                    stq_p(buf, val);
1962
                    break;
1963
                case 4:
1964
                    /* 32 bit read access */
1965
                    error |= io_mem_read(mr, addr1, &val, 4);
1966
                    stl_p(buf, val);
1967
                    break;
1968
                case 2:
1969
                    /* 16 bit read access */
1970
                    error |= io_mem_read(mr, addr1, &val, 2);
1971
                    stw_p(buf, val);
1972
                    break;
1973
                case 1:
1974
                    /* 8 bit read access */
1975
                    error |= io_mem_read(mr, addr1, &val, 1);
1976
                    stb_p(buf, val);
1977
                    break;
1978
                default:
1979
                    abort();
1980
                }
1981
            } else {
1982
                /* RAM case */
1983
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
1984
                memcpy(buf, ptr, l);
1985
            }
1986
        }
1987
        len -= l;
1988
        buf += l;
1989
        addr += l;
1990
    }
1991

    
1992
    return error;
1993
}
1994

    
1995
bool address_space_write(AddressSpace *as, hwaddr addr,
1996
                         const uint8_t *buf, int len)
1997
{
1998
    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1999
}
2000

    
2001
bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2002
{
2003
    return address_space_rw(as, addr, buf, len, false);
2004
}
2005

    
2006

    
2007
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2008
                            int len, int is_write)
2009
{
2010
    address_space_rw(&address_space_memory, addr, buf, len, is_write);
2011
}
2012

    
2013
/* used for ROM loading : can write in RAM and ROM */
2014
void cpu_physical_memory_write_rom(hwaddr addr,
2015
                                   const uint8_t *buf, int len)
2016
{
2017
    hwaddr l;
2018
    uint8_t *ptr;
2019
    hwaddr addr1;
2020
    MemoryRegion *mr;
2021

    
2022
    while (len > 0) {
2023
        l = len;
2024
        mr = address_space_translate(&address_space_memory,
2025
                                     addr, &addr1, &l, true);
2026

    
2027
        if (!(memory_region_is_ram(mr) ||
2028
              memory_region_is_romd(mr))) {
2029
            /* do nothing */
2030
        } else {
2031
            addr1 += memory_region_get_ram_addr(mr);
2032
            /* ROM/RAM case */
2033
            ptr = qemu_get_ram_ptr(addr1);
2034
            memcpy(ptr, buf, l);
2035
            invalidate_and_set_dirty(addr1, l);
2036
        }
2037
        len -= l;
2038
        buf += l;
2039
        addr += l;
2040
    }
2041
}
2042

    
2043
typedef struct {
2044
    MemoryRegion *mr;
2045
    void *buffer;
2046
    hwaddr addr;
2047
    hwaddr len;
2048
} BounceBuffer;
2049

    
2050
static BounceBuffer bounce;
2051

    
2052
typedef struct MapClient {
2053
    void *opaque;
2054
    void (*callback)(void *opaque);
2055
    QLIST_ENTRY(MapClient) link;
2056
} MapClient;
2057

    
2058
static QLIST_HEAD(map_client_list, MapClient) map_client_list
2059
    = QLIST_HEAD_INITIALIZER(map_client_list);
2060

    
2061
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2062
{
2063
    MapClient *client = g_malloc(sizeof(*client));
2064

    
2065
    client->opaque = opaque;
2066
    client->callback = callback;
2067
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2068
    return client;
2069
}
2070

    
2071
static void cpu_unregister_map_client(void *_client)
2072
{
2073
    MapClient *client = (MapClient *)_client;
2074

    
2075
    QLIST_REMOVE(client, link);
2076
    g_free(client);
2077
}
2078

    
2079
static void cpu_notify_map_clients(void)
2080
{
2081
    MapClient *client;
2082

    
2083
    while (!QLIST_EMPTY(&map_client_list)) {
2084
        client = QLIST_FIRST(&map_client_list);
2085
        client->callback(client->opaque);
2086
        cpu_unregister_map_client(client);
2087
    }
2088
}
2089

    
2090
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2091
{
2092
    MemoryRegion *mr;
2093
    hwaddr l, xlat;
2094

    
2095
    while (len > 0) {
2096
        l = len;
2097
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2098
        if (!memory_access_is_direct(mr, is_write)) {
2099
            l = memory_access_size(mr, l, addr);
2100
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2101
                return false;
2102
            }
2103
        }
2104

    
2105
        len -= l;
2106
        addr += l;
2107
    }
2108
    return true;
2109
}
2110

    
2111
/* Map a physical memory region into a host virtual address.
2112
 * May map a subset of the requested range, given by and returned in *plen.
2113
 * May return NULL if resources needed to perform the mapping are exhausted.
2114
 * Use only for reads OR writes - not for read-modify-write operations.
2115
 * Use cpu_register_map_client() to know when retrying the map operation is
2116
 * likely to succeed.
2117
 */
2118
void *address_space_map(AddressSpace *as,
2119
                        hwaddr addr,
2120
                        hwaddr *plen,
2121
                        bool is_write)
2122
{
2123
    hwaddr len = *plen;
2124
    hwaddr done = 0;
2125
    hwaddr l, xlat, base;
2126
    MemoryRegion *mr, *this_mr;
2127
    ram_addr_t raddr;
2128

    
2129
    if (len == 0) {
2130
        return NULL;
2131
    }
2132

    
2133
    l = len;
2134
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2135
    if (!memory_access_is_direct(mr, is_write)) {
2136
        if (bounce.buffer) {
2137
            return NULL;
2138
        }
2139
        /* Avoid unbounded allocations */
2140
        l = MIN(l, TARGET_PAGE_SIZE);
2141
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2142
        bounce.addr = addr;
2143
        bounce.len = l;
2144

    
2145
        memory_region_ref(mr);
2146
        bounce.mr = mr;
2147
        if (!is_write) {
2148
            address_space_read(as, addr, bounce.buffer, l);
2149
        }
2150

    
2151
        *plen = l;
2152
        return bounce.buffer;
2153
    }
2154

    
2155
    base = xlat;
2156
    raddr = memory_region_get_ram_addr(mr);
2157

    
2158
    for (;;) {
2159
        len -= l;
2160
        addr += l;
2161
        done += l;
2162
        if (len == 0) {
2163
            break;
2164
        }
2165

    
2166
        l = len;
2167
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2168
        if (this_mr != mr || xlat != base + done) {
2169
            break;
2170
        }
2171
    }
2172

    
2173
    memory_region_ref(mr);
2174
    *plen = done;
2175
    return qemu_ram_ptr_length(raddr + base, plen);
2176
}
2177

    
2178
/* Unmaps a memory region previously mapped by address_space_map().
2179
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2180
 * the amount of memory that was actually read or written by the caller.
2181
 */
2182
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2183
                         int is_write, hwaddr access_len)
2184
{
2185
    if (buffer != bounce.buffer) {
2186
        MemoryRegion *mr;
2187
        ram_addr_t addr1;
2188

    
2189
        mr = qemu_ram_addr_from_host(buffer, &addr1);
2190
        assert(mr != NULL);
2191
        if (is_write) {
2192
            while (access_len) {
2193
                unsigned l;
2194
                l = TARGET_PAGE_SIZE;
2195
                if (l > access_len)
2196
                    l = access_len;
2197
                invalidate_and_set_dirty(addr1, l);
2198
                addr1 += l;
2199
                access_len -= l;
2200
            }
2201
        }
2202
        if (xen_enabled()) {
2203
            xen_invalidate_map_cache_entry(buffer);
2204
        }
2205
        memory_region_unref(mr);
2206
        return;
2207
    }
2208
    if (is_write) {
2209
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2210
    }
2211
    qemu_vfree(bounce.buffer);
2212
    bounce.buffer = NULL;
2213
    memory_region_unref(bounce.mr);
2214
    cpu_notify_map_clients();
2215
}
2216

    
2217
void *cpu_physical_memory_map(hwaddr addr,
2218
                              hwaddr *plen,
2219
                              int is_write)
2220
{
2221
    return address_space_map(&address_space_memory, addr, plen, is_write);
2222
}
2223

    
2224
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2225
                               int is_write, hwaddr access_len)
2226
{
2227
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2228
}
2229

    
2230
/* warning: addr must be aligned */
2231
static inline uint32_t ldl_phys_internal(hwaddr addr,
2232
                                         enum device_endian endian)
2233
{
2234
    uint8_t *ptr;
2235
    uint64_t val;
2236
    MemoryRegion *mr;
2237
    hwaddr l = 4;
2238
    hwaddr addr1;
2239

    
2240
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2241
                                 false);
2242
    if (l < 4 || !memory_access_is_direct(mr, false)) {
2243
        /* I/O case */
2244
        io_mem_read(mr, addr1, &val, 4);
2245
#if defined(TARGET_WORDS_BIGENDIAN)
2246
        if (endian == DEVICE_LITTLE_ENDIAN) {
2247
            val = bswap32(val);
2248
        }
2249
#else
2250
        if (endian == DEVICE_BIG_ENDIAN) {
2251
            val = bswap32(val);
2252
        }
2253
#endif
2254
    } else {
2255
        /* RAM case */
2256
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2257
                                & TARGET_PAGE_MASK)
2258
                               + addr1);
2259
        switch (endian) {
2260
        case DEVICE_LITTLE_ENDIAN:
2261
            val = ldl_le_p(ptr);
2262
            break;
2263
        case DEVICE_BIG_ENDIAN:
2264
            val = ldl_be_p(ptr);
2265
            break;
2266
        default:
2267
            val = ldl_p(ptr);
2268
            break;
2269
        }
2270
    }
2271
    return val;
2272
}
2273

    
2274
uint32_t ldl_phys(hwaddr addr)
2275
{
2276
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2277
}
2278

    
2279
uint32_t ldl_le_phys(hwaddr addr)
2280
{
2281
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2282
}
2283

    
2284
uint32_t ldl_be_phys(hwaddr addr)
2285
{
2286
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2287
}
2288

    
2289
/* warning: addr must be aligned */
2290
static inline uint64_t ldq_phys_internal(hwaddr addr,
2291
                                         enum device_endian endian)
2292
{
2293
    uint8_t *ptr;
2294
    uint64_t val;
2295
    MemoryRegion *mr;
2296
    hwaddr l = 8;
2297
    hwaddr addr1;
2298

    
2299
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2300
                                 false);
2301
    if (l < 8 || !memory_access_is_direct(mr, false)) {
2302
        /* I/O case */
2303
        io_mem_read(mr, addr1, &val, 8);
2304
#if defined(TARGET_WORDS_BIGENDIAN)
2305
        if (endian == DEVICE_LITTLE_ENDIAN) {
2306
            val = bswap64(val);
2307
        }
2308
#else
2309
        if (endian == DEVICE_BIG_ENDIAN) {
2310
            val = bswap64(val);
2311
        }
2312
#endif
2313
    } else {
2314
        /* RAM case */
2315
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2316
                                & TARGET_PAGE_MASK)
2317
                               + addr1);
2318
        switch (endian) {
2319
        case DEVICE_LITTLE_ENDIAN:
2320
            val = ldq_le_p(ptr);
2321
            break;
2322
        case DEVICE_BIG_ENDIAN:
2323
            val = ldq_be_p(ptr);
2324
            break;
2325
        default:
2326
            val = ldq_p(ptr);
2327
            break;
2328
        }
2329
    }
2330
    return val;
2331
}
2332

    
2333
uint64_t ldq_phys(hwaddr addr)
2334
{
2335
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2336
}
2337

    
2338
uint64_t ldq_le_phys(hwaddr addr)
2339
{
2340
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2341
}
2342

    
2343
uint64_t ldq_be_phys(hwaddr addr)
2344
{
2345
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2346
}
2347

    
2348
/* XXX: optimize */
2349
uint32_t ldub_phys(hwaddr addr)
2350
{
2351
    uint8_t val;
2352
    cpu_physical_memory_read(addr, &val, 1);
2353
    return val;
2354
}
2355

    
2356
/* warning: addr must be aligned */
2357
static inline uint32_t lduw_phys_internal(hwaddr addr,
2358
                                          enum device_endian endian)
2359
{
2360
    uint8_t *ptr;
2361
    uint64_t val;
2362
    MemoryRegion *mr;
2363
    hwaddr l = 2;
2364
    hwaddr addr1;
2365

    
2366
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2367
                                 false);
2368
    if (l < 2 || !memory_access_is_direct(mr, false)) {
2369
        /* I/O case */
2370
        io_mem_read(mr, addr1, &val, 2);
2371
#if defined(TARGET_WORDS_BIGENDIAN)
2372
        if (endian == DEVICE_LITTLE_ENDIAN) {
2373
            val = bswap16(val);
2374
        }
2375
#else
2376
        if (endian == DEVICE_BIG_ENDIAN) {
2377
            val = bswap16(val);
2378
        }
2379
#endif
2380
    } else {
2381
        /* RAM case */
2382
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2383
                                & TARGET_PAGE_MASK)
2384
                               + addr1);
2385
        switch (endian) {
2386
        case DEVICE_LITTLE_ENDIAN:
2387
            val = lduw_le_p(ptr);
2388
            break;
2389
        case DEVICE_BIG_ENDIAN:
2390
            val = lduw_be_p(ptr);
2391
            break;
2392
        default:
2393
            val = lduw_p(ptr);
2394
            break;
2395
        }
2396
    }
2397
    return val;
2398
}
2399

    
2400
uint32_t lduw_phys(hwaddr addr)
2401
{
2402
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2403
}
2404

    
2405
uint32_t lduw_le_phys(hwaddr addr)
2406
{
2407
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2408
}
2409

    
2410
uint32_t lduw_be_phys(hwaddr addr)
2411
{
2412
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2413
}
2414

    
2415
/* warning: addr must be aligned. The ram page is not masked as dirty
2416
   and the code inside is not invalidated. It is useful if the dirty
2417
   bits are used to track modified PTEs */
2418
void stl_phys_notdirty(hwaddr addr, uint32_t val)
2419
{
2420
    uint8_t *ptr;
2421
    MemoryRegion *mr;
2422
    hwaddr l = 4;
2423
    hwaddr addr1;
2424

    
2425
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2426
                                 true);
2427
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2428
        io_mem_write(mr, addr1, val, 4);
2429
    } else {
2430
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2431
        ptr = qemu_get_ram_ptr(addr1);
2432
        stl_p(ptr, val);
2433

    
2434
        if (unlikely(in_migration)) {
2435
            if (!cpu_physical_memory_is_dirty(addr1)) {
2436
                /* invalidate code */
2437
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2438
                /* set dirty bit */
2439
                cpu_physical_memory_set_dirty_flags(
2440
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2441
            }
2442
        }
2443
    }
2444
}
2445

    
2446
/* warning: addr must be aligned */
2447
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2448
                                     enum device_endian endian)
2449
{
2450
    uint8_t *ptr;
2451
    MemoryRegion *mr;
2452
    hwaddr l = 4;
2453
    hwaddr addr1;
2454

    
2455
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2456
                                 true);
2457
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2458
#if defined(TARGET_WORDS_BIGENDIAN)
2459
        if (endian == DEVICE_LITTLE_ENDIAN) {
2460
            val = bswap32(val);
2461
        }
2462
#else
2463
        if (endian == DEVICE_BIG_ENDIAN) {
2464
            val = bswap32(val);
2465
        }
2466
#endif
2467
        io_mem_write(mr, addr1, val, 4);
2468
    } else {
2469
        /* RAM case */
2470
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2471
        ptr = qemu_get_ram_ptr(addr1);
2472
        switch (endian) {
2473
        case DEVICE_LITTLE_ENDIAN:
2474
            stl_le_p(ptr, val);
2475
            break;
2476
        case DEVICE_BIG_ENDIAN:
2477
            stl_be_p(ptr, val);
2478
            break;
2479
        default:
2480
            stl_p(ptr, val);
2481
            break;
2482
        }
2483
        invalidate_and_set_dirty(addr1, 4);
2484
    }
2485
}
2486

    
2487
void stl_phys(hwaddr addr, uint32_t val)
2488
{
2489
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2490
}
2491

    
2492
void stl_le_phys(hwaddr addr, uint32_t val)
2493
{
2494
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2495
}
2496

    
2497
void stl_be_phys(hwaddr addr, uint32_t val)
2498
{
2499
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2500
}
2501

    
2502
/* XXX: optimize */
2503
void stb_phys(hwaddr addr, uint32_t val)
2504
{
2505
    uint8_t v = val;
2506
    cpu_physical_memory_write(addr, &v, 1);
2507
}
2508

    
2509
/* warning: addr must be aligned */
2510
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2511
                                     enum device_endian endian)
2512
{
2513
    uint8_t *ptr;
2514
    MemoryRegion *mr;
2515
    hwaddr l = 2;
2516
    hwaddr addr1;
2517

    
2518
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2519
                                 true);
2520
    if (l < 2 || !memory_access_is_direct(mr, true)) {
2521
#if defined(TARGET_WORDS_BIGENDIAN)
2522
        if (endian == DEVICE_LITTLE_ENDIAN) {
2523
            val = bswap16(val);
2524
        }
2525
#else
2526
        if (endian == DEVICE_BIG_ENDIAN) {
2527
            val = bswap16(val);
2528
        }
2529
#endif
2530
        io_mem_write(mr, addr1, val, 2);
2531
    } else {
2532
        /* RAM case */
2533
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2534
        ptr = qemu_get_ram_ptr(addr1);
2535
        switch (endian) {
2536
        case DEVICE_LITTLE_ENDIAN:
2537
            stw_le_p(ptr, val);
2538
            break;
2539
        case DEVICE_BIG_ENDIAN:
2540
            stw_be_p(ptr, val);
2541
            break;
2542
        default:
2543
            stw_p(ptr, val);
2544
            break;
2545
        }
2546
        invalidate_and_set_dirty(addr1, 2);
2547
    }
2548
}
2549

    
2550
void stw_phys(hwaddr addr, uint32_t val)
2551
{
2552
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2553
}
2554

    
2555
void stw_le_phys(hwaddr addr, uint32_t val)
2556
{
2557
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2558
}
2559

    
2560
void stw_be_phys(hwaddr addr, uint32_t val)
2561
{
2562
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2563
}
2564

    
2565
/* XXX: optimize */
2566
void stq_phys(hwaddr addr, uint64_t val)
2567
{
2568
    val = tswap64(val);
2569
    cpu_physical_memory_write(addr, &val, 8);
2570
}
2571

    
2572
void stq_le_phys(hwaddr addr, uint64_t val)
2573
{
2574
    val = cpu_to_le64(val);
2575
    cpu_physical_memory_write(addr, &val, 8);
2576
}
2577

    
2578
void stq_be_phys(hwaddr addr, uint64_t val)
2579
{
2580
    val = cpu_to_be64(val);
2581
    cpu_physical_memory_write(addr, &val, 8);
2582
}
2583

    
2584
/* virtual memory access for debug (includes writing to ROM) */
2585
int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2586
                        uint8_t *buf, int len, int is_write)
2587
{
2588
    int l;
2589
    hwaddr phys_addr;
2590
    target_ulong page;
2591

    
2592
    while (len > 0) {
2593
        page = addr & TARGET_PAGE_MASK;
2594
        phys_addr = cpu_get_phys_page_debug(cpu, page);
2595
        /* if no physical page mapped, return an error */
2596
        if (phys_addr == -1)
2597
            return -1;
2598
        l = (page + TARGET_PAGE_SIZE) - addr;
2599
        if (l > len)
2600
            l = len;
2601
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2602
        if (is_write)
2603
            cpu_physical_memory_write_rom(phys_addr, buf, l);
2604
        else
2605
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2606
        len -= l;
2607
        buf += l;
2608
        addr += l;
2609
    }
2610
    return 0;
2611
}
2612
#endif
2613

    
2614
#if !defined(CONFIG_USER_ONLY)
2615

    
2616
/*
2617
 * A helper function for the _utterly broken_ virtio device model to find out if
2618
 * it's running on a big endian machine. Don't do this at home kids!
2619
 */
2620
bool virtio_is_big_endian(void);
2621
bool virtio_is_big_endian(void)
2622
{
2623
#if defined(TARGET_WORDS_BIGENDIAN)
2624
    return true;
2625
#else
2626
    return false;
2627
#endif
2628
}
2629

    
2630
#endif
2631

    
2632
#ifndef CONFIG_USER_ONLY
2633
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2634
{
2635
    MemoryRegion*mr;
2636
    hwaddr l = 1;
2637

    
2638
    mr = address_space_translate(&address_space_memory,
2639
                                 phys_addr, &phys_addr, &l, false);
2640

    
2641
    return !(memory_region_is_ram(mr) ||
2642
             memory_region_is_romd(mr));
2643
}
2644

    
2645
void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2646
{
2647
    RAMBlock *block;
2648

    
2649
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2650
        func(block->host, block->offset, block->length, opaque);
2651
    }
2652
}
2653
#endif