Statistics
| Branch: | Revision:

root / exec.c @ e3127ae0

History | View | Annotate | Download (70.9 kB)

1
/*
2
 *  Virtual page mapping
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "qemu/osdep.h"
33
#include "sysemu/kvm.h"
34
#include "hw/xen/xen.h"
35
#include "qemu/timer.h"
36
#include "qemu/config-file.h"
37
#include "exec/memory.h"
38
#include "sysemu/dma.h"
39
#include "exec/address-spaces.h"
40
#if defined(CONFIG_USER_ONLY)
41
#include <qemu.h>
42
#else /* !CONFIG_USER_ONLY */
43
#include "sysemu/xen-mapcache.h"
44
#include "trace.h"
45
#endif
46
#include "exec/cpu-all.h"
47

    
48
#include "exec/cputlb.h"
49
#include "translate-all.h"
50

    
51
#include "exec/memory-internal.h"
52

    
53
//#define DEBUG_SUBPAGE
54

    
55
#if !defined(CONFIG_USER_ONLY)
56
int phys_ram_fd;
57
static int in_migration;
58

    
59
RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
60

    
61
static MemoryRegion *system_memory;
62
static MemoryRegion *system_io;
63

    
64
AddressSpace address_space_io;
65
AddressSpace address_space_memory;
66

    
67
MemoryRegion io_mem_rom, io_mem_notdirty;
68
static MemoryRegion io_mem_unassigned;
69

    
70
#endif
71

    
72
CPUArchState *first_cpu;
73
/* current CPU in the current thread. It is only valid inside
74
   cpu_exec() */
75
DEFINE_TLS(CPUArchState *,cpu_single_env);
76
/* 0 = Do not count executed instructions.
77
   1 = Precise instruction counting.
78
   2 = Adaptive rate instruction counting.  */
79
int use_icount;
80

    
81
#if !defined(CONFIG_USER_ONLY)
82

    
83
typedef struct PhysPageEntry PhysPageEntry;
84

    
85
struct PhysPageEntry {
86
    uint16_t is_leaf : 1;
87
     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
88
    uint16_t ptr : 15;
89
};
90

    
91
struct AddressSpaceDispatch {
92
    /* This is a multi-level map on the physical address space.
93
     * The bottom level has pointers to MemoryRegionSections.
94
     */
95
    PhysPageEntry phys_map;
96
    MemoryListener listener;
97
    AddressSpace *as;
98
};
99

    
100
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
101
typedef struct subpage_t {
102
    MemoryRegion iomem;
103
    AddressSpace *as;
104
    hwaddr base;
105
    uint16_t sub_section[TARGET_PAGE_SIZE];
106
} subpage_t;
107

    
108
static MemoryRegionSection *phys_sections;
109
static unsigned phys_sections_nb, phys_sections_nb_alloc;
110
static uint16_t phys_section_unassigned;
111
static uint16_t phys_section_notdirty;
112
static uint16_t phys_section_rom;
113
static uint16_t phys_section_watch;
114

    
115
/* Simple allocator for PhysPageEntry nodes */
116
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
117
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
118

    
119
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
120

    
121
static void io_mem_init(void);
122
static void memory_map_init(void);
123
static void *qemu_safe_ram_ptr(ram_addr_t addr);
124

    
125
static MemoryRegion io_mem_watch;
126
#endif
127

    
128
#if !defined(CONFIG_USER_ONLY)
129

    
130
static void phys_map_node_reserve(unsigned nodes)
131
{
132
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
133
        typedef PhysPageEntry Node[L2_SIZE];
134
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
135
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
136
                                      phys_map_nodes_nb + nodes);
137
        phys_map_nodes = g_renew(Node, phys_map_nodes,
138
                                 phys_map_nodes_nb_alloc);
139
    }
140
}
141

    
142
static uint16_t phys_map_node_alloc(void)
143
{
144
    unsigned i;
145
    uint16_t ret;
146

    
147
    ret = phys_map_nodes_nb++;
148
    assert(ret != PHYS_MAP_NODE_NIL);
149
    assert(ret != phys_map_nodes_nb_alloc);
150
    for (i = 0; i < L2_SIZE; ++i) {
151
        phys_map_nodes[ret][i].is_leaf = 0;
152
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
153
    }
154
    return ret;
155
}
156

    
157
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
158
                                hwaddr *nb, uint16_t leaf,
159
                                int level)
160
{
161
    PhysPageEntry *p;
162
    int i;
163
    hwaddr step = (hwaddr)1 << (level * L2_BITS);
164

    
165
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
166
        lp->ptr = phys_map_node_alloc();
167
        p = phys_map_nodes[lp->ptr];
168
        if (level == 0) {
169
            for (i = 0; i < L2_SIZE; i++) {
170
                p[i].is_leaf = 1;
171
                p[i].ptr = phys_section_unassigned;
172
            }
173
        }
174
    } else {
175
        p = phys_map_nodes[lp->ptr];
176
    }
177
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
178

    
179
    while (*nb && lp < &p[L2_SIZE]) {
180
        if ((*index & (step - 1)) == 0 && *nb >= step) {
181
            lp->is_leaf = true;
182
            lp->ptr = leaf;
183
            *index += step;
184
            *nb -= step;
185
        } else {
186
            phys_page_set_level(lp, index, nb, leaf, level - 1);
187
        }
188
        ++lp;
189
    }
190
}
191

    
192
static void phys_page_set(AddressSpaceDispatch *d,
193
                          hwaddr index, hwaddr nb,
194
                          uint16_t leaf)
195
{
196
    /* Wildly overreserve - it doesn't matter much. */
197
    phys_map_node_reserve(3 * P_L2_LEVELS);
198

    
199
    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
200
}
201

    
202
static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
203
{
204
    PhysPageEntry lp = d->phys_map;
205
    PhysPageEntry *p;
206
    int i;
207

    
208
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
209
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
210
            return &phys_sections[phys_section_unassigned];
211
        }
212
        p = phys_map_nodes[lp.ptr];
213
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
214
    }
215
    return &phys_sections[lp.ptr];
216
}
217

    
218
bool memory_region_is_unassigned(MemoryRegion *mr)
219
{
220
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
221
        && mr != &io_mem_watch;
222
}
223

    
224
static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
225
                                                        hwaddr addr,
226
                                                        bool resolve_subpage)
227
{
228
    MemoryRegionSection *section;
229
    subpage_t *subpage;
230

    
231
    section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
232
    if (resolve_subpage && section->mr->subpage) {
233
        subpage = container_of(section->mr, subpage_t, iomem);
234
        section = &phys_sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
235
    }
236
    return section;
237
}
238

    
239
static MemoryRegionSection *
240
address_space_translate_internal(AddressSpace *as, hwaddr addr, hwaddr *xlat,
241
                                 hwaddr *plen, bool resolve_subpage)
242
{
243
    MemoryRegionSection *section;
244
    Int128 diff;
245

    
246
    section = address_space_lookup_region(as, addr, resolve_subpage);
247
    /* Compute offset within MemoryRegionSection */
248
    addr -= section->offset_within_address_space;
249

    
250
    /* Compute offset within MemoryRegion */
251
    *xlat = addr + section->offset_within_region;
252

    
253
    diff = int128_sub(section->mr->size, int128_make64(addr));
254
    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
255
    return section;
256
}
257

    
258
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
259
                                      hwaddr *xlat, hwaddr *plen,
260
                                      bool is_write)
261
{
262
    IOMMUTLBEntry iotlb;
263
    MemoryRegionSection *section;
264
    MemoryRegion *mr;
265
    hwaddr len = *plen;
266

    
267
    for (;;) {
268
        section = address_space_translate_internal(as, addr, &addr, plen, true);
269
        mr = section->mr;
270

    
271
        if (!mr->iommu_ops) {
272
            break;
273
        }
274

    
275
        iotlb = mr->iommu_ops->translate(mr, addr);
276
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
277
                | (addr & iotlb.addr_mask));
278
        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
279
        if (!(iotlb.perm & (1 << is_write))) {
280
            mr = &io_mem_unassigned;
281
            break;
282
        }
283

    
284
        as = iotlb.target_as;
285
    }
286

    
287
    *plen = len;
288
    *xlat = addr;
289
    return mr;
290
}
291

    
292
MemoryRegionSection *
293
address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
294
                                  hwaddr *plen)
295
{
296
    MemoryRegionSection *section;
297
    section = address_space_translate_internal(as, addr, xlat, plen, false);
298

    
299
    assert(!section->mr->iommu_ops);
300
    return section;
301
}
302
#endif
303

    
304
void cpu_exec_init_all(void)
305
{
306
#if !defined(CONFIG_USER_ONLY)
307
    qemu_mutex_init(&ram_list.mutex);
308
    memory_map_init();
309
    io_mem_init();
310
#endif
311
}
312

    
313
#if !defined(CONFIG_USER_ONLY)
314

    
315
static int cpu_common_post_load(void *opaque, int version_id)
316
{
317
    CPUState *cpu = opaque;
318

    
319
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
320
       version_id is increased. */
321
    cpu->interrupt_request &= ~0x01;
322
    tlb_flush(cpu->env_ptr, 1);
323

    
324
    return 0;
325
}
326

    
327
const VMStateDescription vmstate_cpu_common = {
328
    .name = "cpu_common",
329
    .version_id = 1,
330
    .minimum_version_id = 1,
331
    .minimum_version_id_old = 1,
332
    .post_load = cpu_common_post_load,
333
    .fields      = (VMStateField []) {
334
        VMSTATE_UINT32(halted, CPUState),
335
        VMSTATE_UINT32(interrupt_request, CPUState),
336
        VMSTATE_END_OF_LIST()
337
    }
338
};
339

    
340
#endif
341

    
342
CPUState *qemu_get_cpu(int index)
343
{
344
    CPUArchState *env = first_cpu;
345
    CPUState *cpu = NULL;
346

    
347
    while (env) {
348
        cpu = ENV_GET_CPU(env);
349
        if (cpu->cpu_index == index) {
350
            break;
351
        }
352
        env = env->next_cpu;
353
    }
354

    
355
    return env ? cpu : NULL;
356
}
357

    
358
void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
359
{
360
    CPUArchState *env = first_cpu;
361

    
362
    while (env) {
363
        func(ENV_GET_CPU(env), data);
364
        env = env->next_cpu;
365
    }
366
}
367

    
368
void cpu_exec_init(CPUArchState *env)
369
{
370
    CPUState *cpu = ENV_GET_CPU(env);
371
    CPUClass *cc = CPU_GET_CLASS(cpu);
372
    CPUArchState **penv;
373
    int cpu_index;
374

    
375
#if defined(CONFIG_USER_ONLY)
376
    cpu_list_lock();
377
#endif
378
    env->next_cpu = NULL;
379
    penv = &first_cpu;
380
    cpu_index = 0;
381
    while (*penv != NULL) {
382
        penv = &(*penv)->next_cpu;
383
        cpu_index++;
384
    }
385
    cpu->cpu_index = cpu_index;
386
    cpu->numa_node = 0;
387
    QTAILQ_INIT(&env->breakpoints);
388
    QTAILQ_INIT(&env->watchpoints);
389
#ifndef CONFIG_USER_ONLY
390
    cpu->thread_id = qemu_get_thread_id();
391
#endif
392
    *penv = env;
393
#if defined(CONFIG_USER_ONLY)
394
    cpu_list_unlock();
395
#endif
396
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
397
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
398
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
399
                    cpu_save, cpu_load, env);
400
    assert(cc->vmsd == NULL);
401
#endif
402
    if (cc->vmsd != NULL) {
403
        vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
404
    }
405
}
406

    
407
#if defined(TARGET_HAS_ICE)
408
#if defined(CONFIG_USER_ONLY)
409
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
410
{
411
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
412
}
413
#else
414
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
415
{
416
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
417
            (pc & ~TARGET_PAGE_MASK));
418
}
419
#endif
420
#endif /* TARGET_HAS_ICE */
421

    
422
#if defined(CONFIG_USER_ONLY)
423
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
424

    
425
{
426
}
427

    
428
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
429
                          int flags, CPUWatchpoint **watchpoint)
430
{
431
    return -ENOSYS;
432
}
433
#else
434
/* Add a watchpoint.  */
435
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
436
                          int flags, CPUWatchpoint **watchpoint)
437
{
438
    target_ulong len_mask = ~(len - 1);
439
    CPUWatchpoint *wp;
440

    
441
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
442
    if ((len & (len - 1)) || (addr & ~len_mask) ||
443
            len == 0 || len > TARGET_PAGE_SIZE) {
444
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
445
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
446
        return -EINVAL;
447
    }
448
    wp = g_malloc(sizeof(*wp));
449

    
450
    wp->vaddr = addr;
451
    wp->len_mask = len_mask;
452
    wp->flags = flags;
453

    
454
    /* keep all GDB-injected watchpoints in front */
455
    if (flags & BP_GDB)
456
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
457
    else
458
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
459

    
460
    tlb_flush_page(env, addr);
461

    
462
    if (watchpoint)
463
        *watchpoint = wp;
464
    return 0;
465
}
466

    
467
/* Remove a specific watchpoint.  */
468
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
469
                          int flags)
470
{
471
    target_ulong len_mask = ~(len - 1);
472
    CPUWatchpoint *wp;
473

    
474
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
475
        if (addr == wp->vaddr && len_mask == wp->len_mask
476
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
477
            cpu_watchpoint_remove_by_ref(env, wp);
478
            return 0;
479
        }
480
    }
481
    return -ENOENT;
482
}
483

    
484
/* Remove a specific watchpoint by reference.  */
485
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
486
{
487
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
488

    
489
    tlb_flush_page(env, watchpoint->vaddr);
490

    
491
    g_free(watchpoint);
492
}
493

    
494
/* Remove all matching watchpoints.  */
495
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
496
{
497
    CPUWatchpoint *wp, *next;
498

    
499
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
500
        if (wp->flags & mask)
501
            cpu_watchpoint_remove_by_ref(env, wp);
502
    }
503
}
504
#endif
505

    
506
/* Add a breakpoint.  */
507
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
508
                          CPUBreakpoint **breakpoint)
509
{
510
#if defined(TARGET_HAS_ICE)
511
    CPUBreakpoint *bp;
512

    
513
    bp = g_malloc(sizeof(*bp));
514

    
515
    bp->pc = pc;
516
    bp->flags = flags;
517

    
518
    /* keep all GDB-injected breakpoints in front */
519
    if (flags & BP_GDB)
520
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
521
    else
522
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
523

    
524
    breakpoint_invalidate(env, pc);
525

    
526
    if (breakpoint)
527
        *breakpoint = bp;
528
    return 0;
529
#else
530
    return -ENOSYS;
531
#endif
532
}
533

    
534
/* Remove a specific breakpoint.  */
535
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
536
{
537
#if defined(TARGET_HAS_ICE)
538
    CPUBreakpoint *bp;
539

    
540
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
541
        if (bp->pc == pc && bp->flags == flags) {
542
            cpu_breakpoint_remove_by_ref(env, bp);
543
            return 0;
544
        }
545
    }
546
    return -ENOENT;
547
#else
548
    return -ENOSYS;
549
#endif
550
}
551

    
552
/* Remove a specific breakpoint by reference.  */
553
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
554
{
555
#if defined(TARGET_HAS_ICE)
556
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
557

    
558
    breakpoint_invalidate(env, breakpoint->pc);
559

    
560
    g_free(breakpoint);
561
#endif
562
}
563

    
564
/* Remove all matching breakpoints. */
565
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
566
{
567
#if defined(TARGET_HAS_ICE)
568
    CPUBreakpoint *bp, *next;
569

    
570
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
571
        if (bp->flags & mask)
572
            cpu_breakpoint_remove_by_ref(env, bp);
573
    }
574
#endif
575
}
576

    
577
/* enable or disable single step mode. EXCP_DEBUG is returned by the
578
   CPU loop after each instruction */
579
void cpu_single_step(CPUArchState *env, int enabled)
580
{
581
#if defined(TARGET_HAS_ICE)
582
    if (env->singlestep_enabled != enabled) {
583
        env->singlestep_enabled = enabled;
584
        if (kvm_enabled())
585
            kvm_update_guest_debug(env, 0);
586
        else {
587
            /* must flush all the translated code to avoid inconsistencies */
588
            /* XXX: only flush what is necessary */
589
            tb_flush(env);
590
        }
591
    }
592
#endif
593
}
594

    
595
void cpu_abort(CPUArchState *env, const char *fmt, ...)
596
{
597
    CPUState *cpu = ENV_GET_CPU(env);
598
    va_list ap;
599
    va_list ap2;
600

    
601
    va_start(ap, fmt);
602
    va_copy(ap2, ap);
603
    fprintf(stderr, "qemu: fatal: ");
604
    vfprintf(stderr, fmt, ap);
605
    fprintf(stderr, "\n");
606
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
607
    if (qemu_log_enabled()) {
608
        qemu_log("qemu: fatal: ");
609
        qemu_log_vprintf(fmt, ap2);
610
        qemu_log("\n");
611
        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
612
        qemu_log_flush();
613
        qemu_log_close();
614
    }
615
    va_end(ap2);
616
    va_end(ap);
617
#if defined(CONFIG_USER_ONLY)
618
    {
619
        struct sigaction act;
620
        sigfillset(&act.sa_mask);
621
        act.sa_handler = SIG_DFL;
622
        sigaction(SIGABRT, &act, NULL);
623
    }
624
#endif
625
    abort();
626
}
627

    
628
CPUArchState *cpu_copy(CPUArchState *env)
629
{
630
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
631
    CPUArchState *next_cpu = new_env->next_cpu;
632
#if defined(TARGET_HAS_ICE)
633
    CPUBreakpoint *bp;
634
    CPUWatchpoint *wp;
635
#endif
636

    
637
    memcpy(new_env, env, sizeof(CPUArchState));
638

    
639
    /* Preserve chaining. */
640
    new_env->next_cpu = next_cpu;
641

    
642
    /* Clone all break/watchpoints.
643
       Note: Once we support ptrace with hw-debug register access, make sure
644
       BP_CPU break/watchpoints are handled correctly on clone. */
645
    QTAILQ_INIT(&env->breakpoints);
646
    QTAILQ_INIT(&env->watchpoints);
647
#if defined(TARGET_HAS_ICE)
648
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
649
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
650
    }
651
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
652
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
653
                              wp->flags, NULL);
654
    }
655
#endif
656

    
657
    return new_env;
658
}
659

    
660
#if !defined(CONFIG_USER_ONLY)
661
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
662
                                      uintptr_t length)
663
{
664
    uintptr_t start1;
665

    
666
    /* we modify the TLB cache so that the dirty bit will be set again
667
       when accessing the range */
668
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
669
    /* Check that we don't span multiple blocks - this breaks the
670
       address comparisons below.  */
671
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
672
            != (end - 1) - start) {
673
        abort();
674
    }
675
    cpu_tlb_reset_dirty_all(start1, length);
676

    
677
}
678

    
679
/* Note: start and end must be within the same ram block.  */
680
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
681
                                     int dirty_flags)
682
{
683
    uintptr_t length;
684

    
685
    start &= TARGET_PAGE_MASK;
686
    end = TARGET_PAGE_ALIGN(end);
687

    
688
    length = end - start;
689
    if (length == 0)
690
        return;
691
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
692

    
693
    if (tcg_enabled()) {
694
        tlb_reset_dirty_range_all(start, end, length);
695
    }
696
}
697

    
698
static int cpu_physical_memory_set_dirty_tracking(int enable)
699
{
700
    int ret = 0;
701
    in_migration = enable;
702
    return ret;
703
}
704

    
705
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
706
                                       MemoryRegionSection *section,
707
                                       target_ulong vaddr,
708
                                       hwaddr paddr, hwaddr xlat,
709
                                       int prot,
710
                                       target_ulong *address)
711
{
712
    hwaddr iotlb;
713
    CPUWatchpoint *wp;
714

    
715
    if (memory_region_is_ram(section->mr)) {
716
        /* Normal RAM.  */
717
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
718
            + xlat;
719
        if (!section->readonly) {
720
            iotlb |= phys_section_notdirty;
721
        } else {
722
            iotlb |= phys_section_rom;
723
        }
724
    } else {
725
        iotlb = section - phys_sections;
726
        iotlb += xlat;
727
    }
728

    
729
    /* Make accesses to pages with watchpoints go via the
730
       watchpoint trap routines.  */
731
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
732
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
733
            /* Avoid trapping reads of pages with a write breakpoint. */
734
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
735
                iotlb = phys_section_watch + paddr;
736
                *address |= TLB_MMIO;
737
                break;
738
            }
739
        }
740
    }
741

    
742
    return iotlb;
743
}
744
#endif /* defined(CONFIG_USER_ONLY) */
745

    
746
#if !defined(CONFIG_USER_ONLY)
747

    
748
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
749
                             uint16_t section);
750
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
751

    
752
static uint16_t phys_section_add(MemoryRegionSection *section)
753
{
754
    /* The physical section number is ORed with a page-aligned
755
     * pointer to produce the iotlb entries.  Thus it should
756
     * never overflow into the page-aligned value.
757
     */
758
    assert(phys_sections_nb < TARGET_PAGE_SIZE);
759

    
760
    if (phys_sections_nb == phys_sections_nb_alloc) {
761
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
762
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
763
                                phys_sections_nb_alloc);
764
    }
765
    phys_sections[phys_sections_nb] = *section;
766
    memory_region_ref(section->mr);
767
    return phys_sections_nb++;
768
}
769

    
770
static void phys_section_destroy(MemoryRegion *mr)
771
{
772
    memory_region_unref(mr);
773

    
774
    if (mr->subpage) {
775
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
776
        memory_region_destroy(&subpage->iomem);
777
        g_free(subpage);
778
    }
779
}
780

    
781
static void phys_sections_clear(void)
782
{
783
    while (phys_sections_nb > 0) {
784
        MemoryRegionSection *section = &phys_sections[--phys_sections_nb];
785
        phys_section_destroy(section->mr);
786
    }
787
    phys_map_nodes_nb = 0;
788
}
789

    
790
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
791
{
792
    subpage_t *subpage;
793
    hwaddr base = section->offset_within_address_space
794
        & TARGET_PAGE_MASK;
795
    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
796
    MemoryRegionSection subsection = {
797
        .offset_within_address_space = base,
798
        .size = int128_make64(TARGET_PAGE_SIZE),
799
    };
800
    hwaddr start, end;
801

    
802
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
803

    
804
    if (!(existing->mr->subpage)) {
805
        subpage = subpage_init(d->as, base);
806
        subsection.mr = &subpage->iomem;
807
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
808
                      phys_section_add(&subsection));
809
    } else {
810
        subpage = container_of(existing->mr, subpage_t, iomem);
811
    }
812
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
813
    end = start + int128_get64(section->size) - 1;
814
    subpage_register(subpage, start, end, phys_section_add(section));
815
}
816

    
817

    
818
static void register_multipage(AddressSpaceDispatch *d,
819
                               MemoryRegionSection *section)
820
{
821
    hwaddr start_addr = section->offset_within_address_space;
822
    uint16_t section_index = phys_section_add(section);
823
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
824
                                                    TARGET_PAGE_BITS));
825

    
826
    assert(num_pages);
827
    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
828
}
829

    
830
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
831
{
832
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
833
    MemoryRegionSection now = *section, remain = *section;
834
    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
835

    
836
    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
837
        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
838
                       - now.offset_within_address_space;
839

    
840
        now.size = int128_min(int128_make64(left), now.size);
841
        register_subpage(d, &now);
842
    } else {
843
        now.size = int128_zero();
844
    }
845
    while (int128_ne(remain.size, now.size)) {
846
        remain.size = int128_sub(remain.size, now.size);
847
        remain.offset_within_address_space += int128_get64(now.size);
848
        remain.offset_within_region += int128_get64(now.size);
849
        now = remain;
850
        if (int128_lt(remain.size, page_size)) {
851
            register_subpage(d, &now);
852
        } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
853
            now.size = page_size;
854
            register_subpage(d, &now);
855
        } else {
856
            now.size = int128_and(now.size, int128_neg(page_size));
857
            register_multipage(d, &now);
858
        }
859
    }
860
}
861

    
862
void qemu_flush_coalesced_mmio_buffer(void)
863
{
864
    if (kvm_enabled())
865
        kvm_flush_coalesced_mmio_buffer();
866
}
867

    
868
void qemu_mutex_lock_ramlist(void)
869
{
870
    qemu_mutex_lock(&ram_list.mutex);
871
}
872

    
873
void qemu_mutex_unlock_ramlist(void)
874
{
875
    qemu_mutex_unlock(&ram_list.mutex);
876
}
877

    
878
#if defined(__linux__) && !defined(TARGET_S390X)
879

    
880
#include <sys/vfs.h>
881

    
882
#define HUGETLBFS_MAGIC       0x958458f6
883

    
884
static long gethugepagesize(const char *path)
885
{
886
    struct statfs fs;
887
    int ret;
888

    
889
    do {
890
        ret = statfs(path, &fs);
891
    } while (ret != 0 && errno == EINTR);
892

    
893
    if (ret != 0) {
894
        perror(path);
895
        return 0;
896
    }
897

    
898
    if (fs.f_type != HUGETLBFS_MAGIC)
899
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
900

    
901
    return fs.f_bsize;
902
}
903

    
904
static void *file_ram_alloc(RAMBlock *block,
905
                            ram_addr_t memory,
906
                            const char *path)
907
{
908
    char *filename;
909
    char *sanitized_name;
910
    char *c;
911
    void *area;
912
    int fd;
913
#ifdef MAP_POPULATE
914
    int flags;
915
#endif
916
    unsigned long hpagesize;
917

    
918
    hpagesize = gethugepagesize(path);
919
    if (!hpagesize) {
920
        return NULL;
921
    }
922

    
923
    if (memory < hpagesize) {
924
        return NULL;
925
    }
926

    
927
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
928
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
929
        return NULL;
930
    }
931

    
932
    /* Make name safe to use with mkstemp by replacing '/' with '_'. */
933
    sanitized_name = g_strdup(block->mr->name);
934
    for (c = sanitized_name; *c != '\0'; c++) {
935
        if (*c == '/')
936
            *c = '_';
937
    }
938

    
939
    filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
940
                               sanitized_name);
941
    g_free(sanitized_name);
942

    
943
    fd = mkstemp(filename);
944
    if (fd < 0) {
945
        perror("unable to create backing store for hugepages");
946
        g_free(filename);
947
        return NULL;
948
    }
949
    unlink(filename);
950
    g_free(filename);
951

    
952
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
953

    
954
    /*
955
     * ftruncate is not supported by hugetlbfs in older
956
     * hosts, so don't bother bailing out on errors.
957
     * If anything goes wrong with it under other filesystems,
958
     * mmap will fail.
959
     */
960
    if (ftruncate(fd, memory))
961
        perror("ftruncate");
962

    
963
#ifdef MAP_POPULATE
964
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
965
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
966
     * to sidestep this quirk.
967
     */
968
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
969
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
970
#else
971
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
972
#endif
973
    if (area == MAP_FAILED) {
974
        perror("file_ram_alloc: can't mmap RAM pages");
975
        close(fd);
976
        return (NULL);
977
    }
978
    block->fd = fd;
979
    return area;
980
}
981
#endif
982

    
983
static ram_addr_t find_ram_offset(ram_addr_t size)
984
{
985
    RAMBlock *block, *next_block;
986
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
987

    
988
    assert(size != 0); /* it would hand out same offset multiple times */
989

    
990
    if (QTAILQ_EMPTY(&ram_list.blocks))
991
        return 0;
992

    
993
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
994
        ram_addr_t end, next = RAM_ADDR_MAX;
995

    
996
        end = block->offset + block->length;
997

    
998
        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
999
            if (next_block->offset >= end) {
1000
                next = MIN(next, next_block->offset);
1001
            }
1002
        }
1003
        if (next - end >= size && next - end < mingap) {
1004
            offset = end;
1005
            mingap = next - end;
1006
        }
1007
    }
1008

    
1009
    if (offset == RAM_ADDR_MAX) {
1010
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1011
                (uint64_t)size);
1012
        abort();
1013
    }
1014

    
1015
    return offset;
1016
}
1017

    
1018
ram_addr_t last_ram_offset(void)
1019
{
1020
    RAMBlock *block;
1021
    ram_addr_t last = 0;
1022

    
1023
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
1024
        last = MAX(last, block->offset + block->length);
1025

    
1026
    return last;
1027
}
1028

    
1029
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1030
{
1031
    int ret;
1032
    QemuOpts *machine_opts;
1033

    
1034
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1035
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1036
    if (machine_opts &&
1037
        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1038
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1039
        if (ret) {
1040
            perror("qemu_madvise");
1041
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1042
                            "but dump_guest_core=off specified\n");
1043
        }
1044
    }
1045
}
1046

    
1047
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1048
{
1049
    RAMBlock *new_block, *block;
1050

    
1051
    new_block = NULL;
1052
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1053
        if (block->offset == addr) {
1054
            new_block = block;
1055
            break;
1056
        }
1057
    }
1058
    assert(new_block);
1059
    assert(!new_block->idstr[0]);
1060

    
1061
    if (dev) {
1062
        char *id = qdev_get_dev_path(dev);
1063
        if (id) {
1064
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1065
            g_free(id);
1066
        }
1067
    }
1068
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1069

    
1070
    /* This assumes the iothread lock is taken here too.  */
1071
    qemu_mutex_lock_ramlist();
1072
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1073
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1074
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1075
                    new_block->idstr);
1076
            abort();
1077
        }
1078
    }
1079
    qemu_mutex_unlock_ramlist();
1080
}
1081

    
1082
static int memory_try_enable_merging(void *addr, size_t len)
1083
{
1084
    QemuOpts *opts;
1085

    
1086
    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1087
    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1088
        /* disabled by the user */
1089
        return 0;
1090
    }
1091

    
1092
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1093
}
1094

    
1095
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1096
                                   MemoryRegion *mr)
1097
{
1098
    RAMBlock *block, *new_block;
1099

    
1100
    size = TARGET_PAGE_ALIGN(size);
1101
    new_block = g_malloc0(sizeof(*new_block));
1102

    
1103
    /* This assumes the iothread lock is taken here too.  */
1104
    qemu_mutex_lock_ramlist();
1105
    new_block->mr = mr;
1106
    new_block->offset = find_ram_offset(size);
1107
    if (host) {
1108
        new_block->host = host;
1109
        new_block->flags |= RAM_PREALLOC_MASK;
1110
    } else {
1111
        if (mem_path) {
1112
#if defined (__linux__) && !defined(TARGET_S390X)
1113
            new_block->host = file_ram_alloc(new_block, size, mem_path);
1114
            if (!new_block->host) {
1115
                new_block->host = qemu_anon_ram_alloc(size);
1116
                memory_try_enable_merging(new_block->host, size);
1117
            }
1118
#else
1119
            fprintf(stderr, "-mem-path option unsupported\n");
1120
            exit(1);
1121
#endif
1122
        } else {
1123
            if (xen_enabled()) {
1124
                xen_ram_alloc(new_block->offset, size, mr);
1125
            } else if (kvm_enabled()) {
1126
                /* some s390/kvm configurations have special constraints */
1127
                new_block->host = kvm_ram_alloc(size);
1128
            } else {
1129
                new_block->host = qemu_anon_ram_alloc(size);
1130
            }
1131
            memory_try_enable_merging(new_block->host, size);
1132
        }
1133
    }
1134
    new_block->length = size;
1135

    
1136
    /* Keep the list sorted from biggest to smallest block.  */
1137
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1138
        if (block->length < new_block->length) {
1139
            break;
1140
        }
1141
    }
1142
    if (block) {
1143
        QTAILQ_INSERT_BEFORE(block, new_block, next);
1144
    } else {
1145
        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1146
    }
1147
    ram_list.mru_block = NULL;
1148

    
1149
    ram_list.version++;
1150
    qemu_mutex_unlock_ramlist();
1151

    
1152
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1153
                                       last_ram_offset() >> TARGET_PAGE_BITS);
1154
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1155
           0, size >> TARGET_PAGE_BITS);
1156
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1157

    
1158
    qemu_ram_setup_dump(new_block->host, size);
1159
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1160

    
1161
    if (kvm_enabled())
1162
        kvm_setup_guest_memory(new_block->host, size);
1163

    
1164
    return new_block->offset;
1165
}
1166

    
1167
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1168
{
1169
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
1170
}
1171

    
1172
void qemu_ram_free_from_ptr(ram_addr_t addr)
1173
{
1174
    RAMBlock *block;
1175

    
1176
    /* This assumes the iothread lock is taken here too.  */
1177
    qemu_mutex_lock_ramlist();
1178
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1179
        if (addr == block->offset) {
1180
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1181
            ram_list.mru_block = NULL;
1182
            ram_list.version++;
1183
            g_free(block);
1184
            break;
1185
        }
1186
    }
1187
    qemu_mutex_unlock_ramlist();
1188
}
1189

    
1190
void qemu_ram_free(ram_addr_t addr)
1191
{
1192
    RAMBlock *block;
1193

    
1194
    /* This assumes the iothread lock is taken here too.  */
1195
    qemu_mutex_lock_ramlist();
1196
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1197
        if (addr == block->offset) {
1198
            QTAILQ_REMOVE(&ram_list.blocks, block, next);
1199
            ram_list.mru_block = NULL;
1200
            ram_list.version++;
1201
            if (block->flags & RAM_PREALLOC_MASK) {
1202
                ;
1203
            } else if (mem_path) {
1204
#if defined (__linux__) && !defined(TARGET_S390X)
1205
                if (block->fd) {
1206
                    munmap(block->host, block->length);
1207
                    close(block->fd);
1208
                } else {
1209
                    qemu_anon_ram_free(block->host, block->length);
1210
                }
1211
#else
1212
                abort();
1213
#endif
1214
            } else {
1215
                if (xen_enabled()) {
1216
                    xen_invalidate_map_cache_entry(block->host);
1217
                } else {
1218
                    qemu_anon_ram_free(block->host, block->length);
1219
                }
1220
            }
1221
            g_free(block);
1222
            break;
1223
        }
1224
    }
1225
    qemu_mutex_unlock_ramlist();
1226

    
1227
}
1228

    
1229
#ifndef _WIN32
1230
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1231
{
1232
    RAMBlock *block;
1233
    ram_addr_t offset;
1234
    int flags;
1235
    void *area, *vaddr;
1236

    
1237
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1238
        offset = addr - block->offset;
1239
        if (offset < block->length) {
1240
            vaddr = block->host + offset;
1241
            if (block->flags & RAM_PREALLOC_MASK) {
1242
                ;
1243
            } else {
1244
                flags = MAP_FIXED;
1245
                munmap(vaddr, length);
1246
                if (mem_path) {
1247
#if defined(__linux__) && !defined(TARGET_S390X)
1248
                    if (block->fd) {
1249
#ifdef MAP_POPULATE
1250
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1251
                            MAP_PRIVATE;
1252
#else
1253
                        flags |= MAP_PRIVATE;
1254
#endif
1255
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1256
                                    flags, block->fd, offset);
1257
                    } else {
1258
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1259
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1260
                                    flags, -1, 0);
1261
                    }
1262
#else
1263
                    abort();
1264
#endif
1265
                } else {
1266
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
1267
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
1268
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1269
                                flags, -1, 0);
1270
#else
1271
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1272
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1273
                                flags, -1, 0);
1274
#endif
1275
                }
1276
                if (area != vaddr) {
1277
                    fprintf(stderr, "Could not remap addr: "
1278
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1279
                            length, addr);
1280
                    exit(1);
1281
                }
1282
                memory_try_enable_merging(vaddr, length);
1283
                qemu_ram_setup_dump(vaddr, length);
1284
            }
1285
            return;
1286
        }
1287
    }
1288
}
1289
#endif /* !_WIN32 */
1290

    
1291
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
1292
{
1293
    RAMBlock *block;
1294

    
1295
    /* The list is protected by the iothread lock here.  */
1296
    block = ram_list.mru_block;
1297
    if (block && addr - block->offset < block->length) {
1298
        goto found;
1299
    }
1300
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1301
        if (addr - block->offset < block->length) {
1302
            goto found;
1303
        }
1304
    }
1305

    
1306
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1307
    abort();
1308

    
1309
found:
1310
    ram_list.mru_block = block;
1311
    return block;
1312
}
1313

    
1314
/* Return a host pointer to ram allocated with qemu_ram_alloc.
1315
   With the exception of the softmmu code in this file, this should
1316
   only be used for local memory (e.g. video ram) that the device owns,
1317
   and knows it isn't going to access beyond the end of the block.
1318

1319
   It should not be used for general purpose DMA.
1320
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1321
 */
1322
void *qemu_get_ram_ptr(ram_addr_t addr)
1323
{
1324
    RAMBlock *block = qemu_get_ram_block(addr);
1325

    
1326
    if (xen_enabled()) {
1327
        /* We need to check if the requested address is in the RAM
1328
         * because we don't want to map the entire memory in QEMU.
1329
         * In that case just map until the end of the page.
1330
         */
1331
        if (block->offset == 0) {
1332
            return xen_map_cache(addr, 0, 0);
1333
        } else if (block->host == NULL) {
1334
            block->host =
1335
                xen_map_cache(block->offset, block->length, 1);
1336
        }
1337
    }
1338
    return block->host + (addr - block->offset);
1339
}
1340

    
1341
/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1342
 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1343
 *
1344
 * ??? Is this still necessary?
1345
 */
1346
static void *qemu_safe_ram_ptr(ram_addr_t addr)
1347
{
1348
    RAMBlock *block;
1349

    
1350
    /* The list is protected by the iothread lock here.  */
1351
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1352
        if (addr - block->offset < block->length) {
1353
            if (xen_enabled()) {
1354
                /* We need to check if the requested address is in the RAM
1355
                 * because we don't want to map the entire memory in QEMU.
1356
                 * In that case just map until the end of the page.
1357
                 */
1358
                if (block->offset == 0) {
1359
                    return xen_map_cache(addr, 0, 0);
1360
                } else if (block->host == NULL) {
1361
                    block->host =
1362
                        xen_map_cache(block->offset, block->length, 1);
1363
                }
1364
            }
1365
            return block->host + (addr - block->offset);
1366
        }
1367
    }
1368

    
1369
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1370
    abort();
1371

    
1372
    return NULL;
1373
}
1374

    
1375
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1376
 * but takes a size argument */
1377
static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1378
{
1379
    if (*size == 0) {
1380
        return NULL;
1381
    }
1382
    if (xen_enabled()) {
1383
        return xen_map_cache(addr, *size, 1);
1384
    } else {
1385
        RAMBlock *block;
1386

    
1387
        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1388
            if (addr - block->offset < block->length) {
1389
                if (addr - block->offset + *size > block->length)
1390
                    *size = block->length - addr + block->offset;
1391
                return block->host + (addr - block->offset);
1392
            }
1393
        }
1394

    
1395
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1396
        abort();
1397
    }
1398
}
1399

    
1400
/* Some of the softmmu routines need to translate from a host pointer
1401
   (typically a TLB entry) back to a ram offset.  */
1402
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1403
{
1404
    RAMBlock *block;
1405
    uint8_t *host = ptr;
1406

    
1407
    if (xen_enabled()) {
1408
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
1409
        return qemu_get_ram_block(*ram_addr)->mr;
1410
    }
1411

    
1412
    block = ram_list.mru_block;
1413
    if (block && block->host && host - block->host < block->length) {
1414
        goto found;
1415
    }
1416

    
1417
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1418
        /* This case append when the block is not mapped. */
1419
        if (block->host == NULL) {
1420
            continue;
1421
        }
1422
        if (host - block->host < block->length) {
1423
            goto found;
1424
        }
1425
    }
1426

    
1427
    return NULL;
1428

    
1429
found:
1430
    *ram_addr = block->offset + (host - block->host);
1431
    return block->mr;
1432
}
1433

    
1434
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1435
                               uint64_t val, unsigned size)
1436
{
1437
    int dirty_flags;
1438
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1439
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1440
        tb_invalidate_phys_page_fast(ram_addr, size);
1441
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1442
    }
1443
    switch (size) {
1444
    case 1:
1445
        stb_p(qemu_get_ram_ptr(ram_addr), val);
1446
        break;
1447
    case 2:
1448
        stw_p(qemu_get_ram_ptr(ram_addr), val);
1449
        break;
1450
    case 4:
1451
        stl_p(qemu_get_ram_ptr(ram_addr), val);
1452
        break;
1453
    default:
1454
        abort();
1455
    }
1456
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1457
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1458
    /* we remove the notdirty callback only if the code has been
1459
       flushed */
1460
    if (dirty_flags == 0xff)
1461
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1462
}
1463

    
1464
static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1465
                                 unsigned size, bool is_write)
1466
{
1467
    return is_write;
1468
}
1469

    
1470
static const MemoryRegionOps notdirty_mem_ops = {
1471
    .write = notdirty_mem_write,
1472
    .valid.accepts = notdirty_mem_accepts,
1473
    .endianness = DEVICE_NATIVE_ENDIAN,
1474
};
1475

    
1476
/* Generate a debug exception if a watchpoint has been hit.  */
1477
static void check_watchpoint(int offset, int len_mask, int flags)
1478
{
1479
    CPUArchState *env = cpu_single_env;
1480
    target_ulong pc, cs_base;
1481
    target_ulong vaddr;
1482
    CPUWatchpoint *wp;
1483
    int cpu_flags;
1484

    
1485
    if (env->watchpoint_hit) {
1486
        /* We re-entered the check after replacing the TB. Now raise
1487
         * the debug interrupt so that is will trigger after the
1488
         * current instruction. */
1489
        cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1490
        return;
1491
    }
1492
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1493
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1494
        if ((vaddr == (wp->vaddr & len_mask) ||
1495
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1496
            wp->flags |= BP_WATCHPOINT_HIT;
1497
            if (!env->watchpoint_hit) {
1498
                env->watchpoint_hit = wp;
1499
                tb_check_watchpoint(env);
1500
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1501
                    env->exception_index = EXCP_DEBUG;
1502
                    cpu_loop_exit(env);
1503
                } else {
1504
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1505
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1506
                    cpu_resume_from_signal(env, NULL);
1507
                }
1508
            }
1509
        } else {
1510
            wp->flags &= ~BP_WATCHPOINT_HIT;
1511
        }
1512
    }
1513
}
1514

    
1515
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1516
   so these check for a hit then pass through to the normal out-of-line
1517
   phys routines.  */
1518
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1519
                               unsigned size)
1520
{
1521
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1522
    switch (size) {
1523
    case 1: return ldub_phys(addr);
1524
    case 2: return lduw_phys(addr);
1525
    case 4: return ldl_phys(addr);
1526
    default: abort();
1527
    }
1528
}
1529

    
1530
static void watch_mem_write(void *opaque, hwaddr addr,
1531
                            uint64_t val, unsigned size)
1532
{
1533
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1534
    switch (size) {
1535
    case 1:
1536
        stb_phys(addr, val);
1537
        break;
1538
    case 2:
1539
        stw_phys(addr, val);
1540
        break;
1541
    case 4:
1542
        stl_phys(addr, val);
1543
        break;
1544
    default: abort();
1545
    }
1546
}
1547

    
1548
static const MemoryRegionOps watch_mem_ops = {
1549
    .read = watch_mem_read,
1550
    .write = watch_mem_write,
1551
    .endianness = DEVICE_NATIVE_ENDIAN,
1552
};
1553

    
1554
static uint64_t subpage_read(void *opaque, hwaddr addr,
1555
                             unsigned len)
1556
{
1557
    subpage_t *subpage = opaque;
1558
    uint8_t buf[4];
1559

    
1560
#if defined(DEBUG_SUBPAGE)
1561
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1562
           subpage, len, addr);
1563
#endif
1564
    address_space_read(subpage->as, addr + subpage->base, buf, len);
1565
    switch (len) {
1566
    case 1:
1567
        return ldub_p(buf);
1568
    case 2:
1569
        return lduw_p(buf);
1570
    case 4:
1571
        return ldl_p(buf);
1572
    default:
1573
        abort();
1574
    }
1575
}
1576

    
1577
static void subpage_write(void *opaque, hwaddr addr,
1578
                          uint64_t value, unsigned len)
1579
{
1580
    subpage_t *subpage = opaque;
1581
    uint8_t buf[4];
1582

    
1583
#if defined(DEBUG_SUBPAGE)
1584
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1585
           " value %"PRIx64"\n",
1586
           __func__, subpage, len, addr, value);
1587
#endif
1588
    switch (len) {
1589
    case 1:
1590
        stb_p(buf, value);
1591
        break;
1592
    case 2:
1593
        stw_p(buf, value);
1594
        break;
1595
    case 4:
1596
        stl_p(buf, value);
1597
        break;
1598
    default:
1599
        abort();
1600
    }
1601
    address_space_write(subpage->as, addr + subpage->base, buf, len);
1602
}
1603

    
1604
static bool subpage_accepts(void *opaque, hwaddr addr,
1605
                            unsigned size, bool is_write)
1606
{
1607
    subpage_t *subpage = opaque;
1608
#if defined(DEBUG_SUBPAGE)
1609
    printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1610
           __func__, subpage, is_write ? 'w' : 'r', len, addr);
1611
#endif
1612

    
1613
    return address_space_access_valid(subpage->as, addr + subpage->base,
1614
                                      size, is_write);
1615
}
1616

    
1617
static const MemoryRegionOps subpage_ops = {
1618
    .read = subpage_read,
1619
    .write = subpage_write,
1620
    .valid.accepts = subpage_accepts,
1621
    .endianness = DEVICE_NATIVE_ENDIAN,
1622
};
1623

    
1624
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1625
                             uint16_t section)
1626
{
1627
    int idx, eidx;
1628

    
1629
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1630
        return -1;
1631
    idx = SUBPAGE_IDX(start);
1632
    eidx = SUBPAGE_IDX(end);
1633
#if defined(DEBUG_SUBPAGE)
1634
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1635
           mmio, start, end, idx, eidx, memory);
1636
#endif
1637
    for (; idx <= eidx; idx++) {
1638
        mmio->sub_section[idx] = section;
1639
    }
1640

    
1641
    return 0;
1642
}
1643

    
1644
static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1645
{
1646
    subpage_t *mmio;
1647

    
1648
    mmio = g_malloc0(sizeof(subpage_t));
1649

    
1650
    mmio->as = as;
1651
    mmio->base = base;
1652
    memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1653
                          "subpage", TARGET_PAGE_SIZE);
1654
    mmio->iomem.subpage = true;
1655
#if defined(DEBUG_SUBPAGE)
1656
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1657
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1658
#endif
1659
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1660

    
1661
    return mmio;
1662
}
1663

    
1664
static uint16_t dummy_section(MemoryRegion *mr)
1665
{
1666
    MemoryRegionSection section = {
1667
        .mr = mr,
1668
        .offset_within_address_space = 0,
1669
        .offset_within_region = 0,
1670
        .size = int128_2_64(),
1671
    };
1672

    
1673
    return phys_section_add(&section);
1674
}
1675

    
1676
MemoryRegion *iotlb_to_region(hwaddr index)
1677
{
1678
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1679
}
1680

    
1681
static void io_mem_init(void)
1682
{
1683
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1684
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1685
                          "unassigned", UINT64_MAX);
1686
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1687
                          "notdirty", UINT64_MAX);
1688
    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1689
                          "watch", UINT64_MAX);
1690
}
1691

    
1692
static void mem_begin(MemoryListener *listener)
1693
{
1694
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1695

    
1696
    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1697
}
1698

    
1699
static void core_begin(MemoryListener *listener)
1700
{
1701
    phys_sections_clear();
1702
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
1703
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
1704
    phys_section_rom = dummy_section(&io_mem_rom);
1705
    phys_section_watch = dummy_section(&io_mem_watch);
1706
}
1707

    
1708
static void tcg_commit(MemoryListener *listener)
1709
{
1710
    CPUArchState *env;
1711

    
1712
    /* since each CPU stores ram addresses in its TLB cache, we must
1713
       reset the modified entries */
1714
    /* XXX: slow ! */
1715
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
1716
        tlb_flush(env, 1);
1717
    }
1718
}
1719

    
1720
static void core_log_global_start(MemoryListener *listener)
1721
{
1722
    cpu_physical_memory_set_dirty_tracking(1);
1723
}
1724

    
1725
static void core_log_global_stop(MemoryListener *listener)
1726
{
1727
    cpu_physical_memory_set_dirty_tracking(0);
1728
}
1729

    
1730
static MemoryListener core_memory_listener = {
1731
    .begin = core_begin,
1732
    .log_global_start = core_log_global_start,
1733
    .log_global_stop = core_log_global_stop,
1734
    .priority = 1,
1735
};
1736

    
1737
static MemoryListener tcg_memory_listener = {
1738
    .commit = tcg_commit,
1739
};
1740

    
1741
void address_space_init_dispatch(AddressSpace *as)
1742
{
1743
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1744

    
1745
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1746
    d->listener = (MemoryListener) {
1747
        .begin = mem_begin,
1748
        .region_add = mem_add,
1749
        .region_nop = mem_add,
1750
        .priority = 0,
1751
    };
1752
    d->as = as;
1753
    as->dispatch = d;
1754
    memory_listener_register(&d->listener, as);
1755
}
1756

    
1757
void address_space_destroy_dispatch(AddressSpace *as)
1758
{
1759
    AddressSpaceDispatch *d = as->dispatch;
1760

    
1761
    memory_listener_unregister(&d->listener);
1762
    g_free(d);
1763
    as->dispatch = NULL;
1764
}
1765

    
1766
static void memory_map_init(void)
1767
{
1768
    system_memory = g_malloc(sizeof(*system_memory));
1769
    memory_region_init(system_memory, NULL, "system", INT64_MAX);
1770
    address_space_init(&address_space_memory, system_memory, "memory");
1771

    
1772
    system_io = g_malloc(sizeof(*system_io));
1773
    memory_region_init(system_io, NULL, "io", 65536);
1774
    address_space_init(&address_space_io, system_io, "I/O");
1775

    
1776
    memory_listener_register(&core_memory_listener, &address_space_memory);
1777
    memory_listener_register(&tcg_memory_listener, &address_space_memory);
1778
}
1779

    
1780
MemoryRegion *get_system_memory(void)
1781
{
1782
    return system_memory;
1783
}
1784

    
1785
MemoryRegion *get_system_io(void)
1786
{
1787
    return system_io;
1788
}
1789

    
1790
#endif /* !defined(CONFIG_USER_ONLY) */
1791

    
1792
/* physical memory access (slow version, mainly for debug) */
1793
#if defined(CONFIG_USER_ONLY)
1794
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1795
                        uint8_t *buf, int len, int is_write)
1796
{
1797
    int l, flags;
1798
    target_ulong page;
1799
    void * p;
1800

    
1801
    while (len > 0) {
1802
        page = addr & TARGET_PAGE_MASK;
1803
        l = (page + TARGET_PAGE_SIZE) - addr;
1804
        if (l > len)
1805
            l = len;
1806
        flags = page_get_flags(page);
1807
        if (!(flags & PAGE_VALID))
1808
            return -1;
1809
        if (is_write) {
1810
            if (!(flags & PAGE_WRITE))
1811
                return -1;
1812
            /* XXX: this code should not depend on lock_user */
1813
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1814
                return -1;
1815
            memcpy(p, buf, l);
1816
            unlock_user(p, addr, l);
1817
        } else {
1818
            if (!(flags & PAGE_READ))
1819
                return -1;
1820
            /* XXX: this code should not depend on lock_user */
1821
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1822
                return -1;
1823
            memcpy(buf, p, l);
1824
            unlock_user(p, addr, 0);
1825
        }
1826
        len -= l;
1827
        buf += l;
1828
        addr += l;
1829
    }
1830
    return 0;
1831
}
1832

    
1833
#else
1834

    
1835
static void invalidate_and_set_dirty(hwaddr addr,
1836
                                     hwaddr length)
1837
{
1838
    if (!cpu_physical_memory_is_dirty(addr)) {
1839
        /* invalidate code */
1840
        tb_invalidate_phys_page_range(addr, addr + length, 0);
1841
        /* set dirty bit */
1842
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1843
    }
1844
    xen_modified_memory(addr, length);
1845
}
1846

    
1847
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1848
{
1849
    if (memory_region_is_ram(mr)) {
1850
        return !(is_write && mr->readonly);
1851
    }
1852
    if (memory_region_is_romd(mr)) {
1853
        return !is_write;
1854
    }
1855

    
1856
    return false;
1857
}
1858

    
1859
static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1860
{
1861
    if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1862
        return 4;
1863
    }
1864
    if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1865
        return 2;
1866
    }
1867
    return 1;
1868
}
1869

    
1870
bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1871
                      int len, bool is_write)
1872
{
1873
    hwaddr l;
1874
    uint8_t *ptr;
1875
    uint64_t val;
1876
    hwaddr addr1;
1877
    MemoryRegion *mr;
1878
    bool error = false;
1879

    
1880
    while (len > 0) {
1881
        l = len;
1882
        mr = address_space_translate(as, addr, &addr1, &l, is_write);
1883

    
1884
        if (is_write) {
1885
            if (!memory_access_is_direct(mr, is_write)) {
1886
                l = memory_access_size(mr, l, addr1);
1887
                /* XXX: could force cpu_single_env to NULL to avoid
1888
                   potential bugs */
1889
                if (l == 4) {
1890
                    /* 32 bit write access */
1891
                    val = ldl_p(buf);
1892
                    error |= io_mem_write(mr, addr1, val, 4);
1893
                } else if (l == 2) {
1894
                    /* 16 bit write access */
1895
                    val = lduw_p(buf);
1896
                    error |= io_mem_write(mr, addr1, val, 2);
1897
                } else {
1898
                    /* 8 bit write access */
1899
                    val = ldub_p(buf);
1900
                    error |= io_mem_write(mr, addr1, val, 1);
1901
                }
1902
            } else {
1903
                addr1 += memory_region_get_ram_addr(mr);
1904
                /* RAM case */
1905
                ptr = qemu_get_ram_ptr(addr1);
1906
                memcpy(ptr, buf, l);
1907
                invalidate_and_set_dirty(addr1, l);
1908
            }
1909
        } else {
1910
            if (!memory_access_is_direct(mr, is_write)) {
1911
                /* I/O case */
1912
                l = memory_access_size(mr, l, addr1);
1913
                if (l == 4) {
1914
                    /* 32 bit read access */
1915
                    error |= io_mem_read(mr, addr1, &val, 4);
1916
                    stl_p(buf, val);
1917
                } else if (l == 2) {
1918
                    /* 16 bit read access */
1919
                    error |= io_mem_read(mr, addr1, &val, 2);
1920
                    stw_p(buf, val);
1921
                } else {
1922
                    /* 8 bit read access */
1923
                    error |= io_mem_read(mr, addr1, &val, 1);
1924
                    stb_p(buf, val);
1925
                }
1926
            } else {
1927
                /* RAM case */
1928
                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
1929
                memcpy(buf, ptr, l);
1930
            }
1931
        }
1932
        len -= l;
1933
        buf += l;
1934
        addr += l;
1935
    }
1936

    
1937
    return error;
1938
}
1939

    
1940
bool address_space_write(AddressSpace *as, hwaddr addr,
1941
                         const uint8_t *buf, int len)
1942
{
1943
    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1944
}
1945

    
1946
bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1947
{
1948
    return address_space_rw(as, addr, buf, len, false);
1949
}
1950

    
1951

    
1952
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1953
                            int len, int is_write)
1954
{
1955
    address_space_rw(&address_space_memory, addr, buf, len, is_write);
1956
}
1957

    
1958
/* used for ROM loading : can write in RAM and ROM */
1959
void cpu_physical_memory_write_rom(hwaddr addr,
1960
                                   const uint8_t *buf, int len)
1961
{
1962
    hwaddr l;
1963
    uint8_t *ptr;
1964
    hwaddr addr1;
1965
    MemoryRegion *mr;
1966

    
1967
    while (len > 0) {
1968
        l = len;
1969
        mr = address_space_translate(&address_space_memory,
1970
                                     addr, &addr1, &l, true);
1971

    
1972
        if (!(memory_region_is_ram(mr) ||
1973
              memory_region_is_romd(mr))) {
1974
            /* do nothing */
1975
        } else {
1976
            addr1 += memory_region_get_ram_addr(mr);
1977
            /* ROM/RAM case */
1978
            ptr = qemu_get_ram_ptr(addr1);
1979
            memcpy(ptr, buf, l);
1980
            invalidate_and_set_dirty(addr1, l);
1981
        }
1982
        len -= l;
1983
        buf += l;
1984
        addr += l;
1985
    }
1986
}
1987

    
1988
typedef struct {
1989
    void *buffer;
1990
    hwaddr addr;
1991
    hwaddr len;
1992
} BounceBuffer;
1993

    
1994
static BounceBuffer bounce;
1995

    
1996
typedef struct MapClient {
1997
    void *opaque;
1998
    void (*callback)(void *opaque);
1999
    QLIST_ENTRY(MapClient) link;
2000
} MapClient;
2001

    
2002
static QLIST_HEAD(map_client_list, MapClient) map_client_list
2003
    = QLIST_HEAD_INITIALIZER(map_client_list);
2004

    
2005
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2006
{
2007
    MapClient *client = g_malloc(sizeof(*client));
2008

    
2009
    client->opaque = opaque;
2010
    client->callback = callback;
2011
    QLIST_INSERT_HEAD(&map_client_list, client, link);
2012
    return client;
2013
}
2014

    
2015
static void cpu_unregister_map_client(void *_client)
2016
{
2017
    MapClient *client = (MapClient *)_client;
2018

    
2019
    QLIST_REMOVE(client, link);
2020
    g_free(client);
2021
}
2022

    
2023
static void cpu_notify_map_clients(void)
2024
{
2025
    MapClient *client;
2026

    
2027
    while (!QLIST_EMPTY(&map_client_list)) {
2028
        client = QLIST_FIRST(&map_client_list);
2029
        client->callback(client->opaque);
2030
        cpu_unregister_map_client(client);
2031
    }
2032
}
2033

    
2034
bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2035
{
2036
    MemoryRegion *mr;
2037
    hwaddr l, xlat;
2038

    
2039
    while (len > 0) {
2040
        l = len;
2041
        mr = address_space_translate(as, addr, &xlat, &l, is_write);
2042
        if (!memory_access_is_direct(mr, is_write)) {
2043
            l = memory_access_size(mr, l, addr);
2044
            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2045
                return false;
2046
            }
2047
        }
2048

    
2049
        len -= l;
2050
        addr += l;
2051
    }
2052
    return true;
2053
}
2054

    
2055
/* Map a physical memory region into a host virtual address.
2056
 * May map a subset of the requested range, given by and returned in *plen.
2057
 * May return NULL if resources needed to perform the mapping are exhausted.
2058
 * Use only for reads OR writes - not for read-modify-write operations.
2059
 * Use cpu_register_map_client() to know when retrying the map operation is
2060
 * likely to succeed.
2061
 */
2062
void *address_space_map(AddressSpace *as,
2063
                        hwaddr addr,
2064
                        hwaddr *plen,
2065
                        bool is_write)
2066
{
2067
    hwaddr len = *plen;
2068
    hwaddr done = 0;
2069
    hwaddr l, xlat, base;
2070
    MemoryRegion *mr, *this_mr;
2071
    ram_addr_t raddr;
2072

    
2073
    if (len == 0) {
2074
        return NULL;
2075
    }
2076

    
2077
    l = len;
2078
    mr = address_space_translate(as, addr, &xlat, &l, is_write);
2079
    if (!memory_access_is_direct(mr, is_write)) {
2080
        if (bounce.buffer) {
2081
            return NULL;
2082
        }
2083
        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2084
        bounce.addr = addr;
2085
        bounce.len = l;
2086
        if (!is_write) {
2087
            address_space_read(as, addr, bounce.buffer, l);
2088
        }
2089

    
2090
        *plen = l;
2091
        return bounce.buffer;
2092
    }
2093

    
2094
    base = xlat;
2095
    raddr = memory_region_get_ram_addr(mr);
2096

    
2097
    for (;;) {
2098
        len -= l;
2099
        addr += l;
2100
        done += l;
2101
        if (len == 0) {
2102
            break;
2103
        }
2104

    
2105
        l = len;
2106
        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2107
        if (this_mr != mr || xlat != base + done) {
2108
            break;
2109
        }
2110
    }
2111

    
2112
    *plen = done;
2113
    return qemu_ram_ptr_length(raddr + base, plen);
2114
}
2115

    
2116
/* Unmaps a memory region previously mapped by address_space_map().
2117
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
2118
 * the amount of memory that was actually read or written by the caller.
2119
 */
2120
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2121
                         int is_write, hwaddr access_len)
2122
{
2123
    if (buffer != bounce.buffer) {
2124
        if (is_write) {
2125
            ram_addr_t addr1;
2126
            MemoryRegion *mr = qemu_ram_addr_from_host(buffer, &addr1);
2127
            assert(mr != NULL);
2128
            while (access_len) {
2129
                unsigned l;
2130
                l = TARGET_PAGE_SIZE;
2131
                if (l > access_len)
2132
                    l = access_len;
2133
                invalidate_and_set_dirty(addr1, l);
2134
                addr1 += l;
2135
                access_len -= l;
2136
            }
2137
        }
2138
        if (xen_enabled()) {
2139
            xen_invalidate_map_cache_entry(buffer);
2140
        }
2141
        return;
2142
    }
2143
    if (is_write) {
2144
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
2145
    }
2146
    qemu_vfree(bounce.buffer);
2147
    bounce.buffer = NULL;
2148
    cpu_notify_map_clients();
2149
}
2150

    
2151
void *cpu_physical_memory_map(hwaddr addr,
2152
                              hwaddr *plen,
2153
                              int is_write)
2154
{
2155
    return address_space_map(&address_space_memory, addr, plen, is_write);
2156
}
2157

    
2158
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2159
                               int is_write, hwaddr access_len)
2160
{
2161
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2162
}
2163

    
2164
/* warning: addr must be aligned */
2165
static inline uint32_t ldl_phys_internal(hwaddr addr,
2166
                                         enum device_endian endian)
2167
{
2168
    uint8_t *ptr;
2169
    uint64_t val;
2170
    MemoryRegion *mr;
2171
    hwaddr l = 4;
2172
    hwaddr addr1;
2173

    
2174
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2175
                                 false);
2176
    if (l < 4 || !memory_access_is_direct(mr, false)) {
2177
        /* I/O case */
2178
        io_mem_read(mr, addr1, &val, 4);
2179
#if defined(TARGET_WORDS_BIGENDIAN)
2180
        if (endian == DEVICE_LITTLE_ENDIAN) {
2181
            val = bswap32(val);
2182
        }
2183
#else
2184
        if (endian == DEVICE_BIG_ENDIAN) {
2185
            val = bswap32(val);
2186
        }
2187
#endif
2188
    } else {
2189
        /* RAM case */
2190
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2191
                                & TARGET_PAGE_MASK)
2192
                               + addr1);
2193
        switch (endian) {
2194
        case DEVICE_LITTLE_ENDIAN:
2195
            val = ldl_le_p(ptr);
2196
            break;
2197
        case DEVICE_BIG_ENDIAN:
2198
            val = ldl_be_p(ptr);
2199
            break;
2200
        default:
2201
            val = ldl_p(ptr);
2202
            break;
2203
        }
2204
    }
2205
    return val;
2206
}
2207

    
2208
uint32_t ldl_phys(hwaddr addr)
2209
{
2210
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2211
}
2212

    
2213
uint32_t ldl_le_phys(hwaddr addr)
2214
{
2215
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2216
}
2217

    
2218
uint32_t ldl_be_phys(hwaddr addr)
2219
{
2220
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2221
}
2222

    
2223
/* warning: addr must be aligned */
2224
static inline uint64_t ldq_phys_internal(hwaddr addr,
2225
                                         enum device_endian endian)
2226
{
2227
    uint8_t *ptr;
2228
    uint64_t val;
2229
    MemoryRegion *mr;
2230
    hwaddr l = 8;
2231
    hwaddr addr1;
2232

    
2233
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2234
                                 false);
2235
    if (l < 8 || !memory_access_is_direct(mr, false)) {
2236
        /* I/O case */
2237
        io_mem_read(mr, addr1, &val, 8);
2238
#if defined(TARGET_WORDS_BIGENDIAN)
2239
        if (endian == DEVICE_LITTLE_ENDIAN) {
2240
            val = bswap64(val);
2241
        }
2242
#else
2243
        if (endian == DEVICE_BIG_ENDIAN) {
2244
            val = bswap64(val);
2245
        }
2246
#endif
2247
    } else {
2248
        /* RAM case */
2249
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2250
                                & TARGET_PAGE_MASK)
2251
                               + addr1);
2252
        switch (endian) {
2253
        case DEVICE_LITTLE_ENDIAN:
2254
            val = ldq_le_p(ptr);
2255
            break;
2256
        case DEVICE_BIG_ENDIAN:
2257
            val = ldq_be_p(ptr);
2258
            break;
2259
        default:
2260
            val = ldq_p(ptr);
2261
            break;
2262
        }
2263
    }
2264
    return val;
2265
}
2266

    
2267
uint64_t ldq_phys(hwaddr addr)
2268
{
2269
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2270
}
2271

    
2272
uint64_t ldq_le_phys(hwaddr addr)
2273
{
2274
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2275
}
2276

    
2277
uint64_t ldq_be_phys(hwaddr addr)
2278
{
2279
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2280
}
2281

    
2282
/* XXX: optimize */
2283
uint32_t ldub_phys(hwaddr addr)
2284
{
2285
    uint8_t val;
2286
    cpu_physical_memory_read(addr, &val, 1);
2287
    return val;
2288
}
2289

    
2290
/* warning: addr must be aligned */
2291
static inline uint32_t lduw_phys_internal(hwaddr addr,
2292
                                          enum device_endian endian)
2293
{
2294
    uint8_t *ptr;
2295
    uint64_t val;
2296
    MemoryRegion *mr;
2297
    hwaddr l = 2;
2298
    hwaddr addr1;
2299

    
2300
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2301
                                 false);
2302
    if (l < 2 || !memory_access_is_direct(mr, false)) {
2303
        /* I/O case */
2304
        io_mem_read(mr, addr1, &val, 2);
2305
#if defined(TARGET_WORDS_BIGENDIAN)
2306
        if (endian == DEVICE_LITTLE_ENDIAN) {
2307
            val = bswap16(val);
2308
        }
2309
#else
2310
        if (endian == DEVICE_BIG_ENDIAN) {
2311
            val = bswap16(val);
2312
        }
2313
#endif
2314
    } else {
2315
        /* RAM case */
2316
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2317
                                & TARGET_PAGE_MASK)
2318
                               + addr1);
2319
        switch (endian) {
2320
        case DEVICE_LITTLE_ENDIAN:
2321
            val = lduw_le_p(ptr);
2322
            break;
2323
        case DEVICE_BIG_ENDIAN:
2324
            val = lduw_be_p(ptr);
2325
            break;
2326
        default:
2327
            val = lduw_p(ptr);
2328
            break;
2329
        }
2330
    }
2331
    return val;
2332
}
2333

    
2334
uint32_t lduw_phys(hwaddr addr)
2335
{
2336
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2337
}
2338

    
2339
uint32_t lduw_le_phys(hwaddr addr)
2340
{
2341
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2342
}
2343

    
2344
uint32_t lduw_be_phys(hwaddr addr)
2345
{
2346
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2347
}
2348

    
2349
/* warning: addr must be aligned. The ram page is not masked as dirty
2350
   and the code inside is not invalidated. It is useful if the dirty
2351
   bits are used to track modified PTEs */
2352
void stl_phys_notdirty(hwaddr addr, uint32_t val)
2353
{
2354
    uint8_t *ptr;
2355
    MemoryRegion *mr;
2356
    hwaddr l = 4;
2357
    hwaddr addr1;
2358

    
2359
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2360
                                 true);
2361
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2362
        io_mem_write(mr, addr1, val, 4);
2363
    } else {
2364
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2365
        ptr = qemu_get_ram_ptr(addr1);
2366
        stl_p(ptr, val);
2367

    
2368
        if (unlikely(in_migration)) {
2369
            if (!cpu_physical_memory_is_dirty(addr1)) {
2370
                /* invalidate code */
2371
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2372
                /* set dirty bit */
2373
                cpu_physical_memory_set_dirty_flags(
2374
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
2375
            }
2376
        }
2377
    }
2378
}
2379

    
2380
/* warning: addr must be aligned */
2381
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2382
                                     enum device_endian endian)
2383
{
2384
    uint8_t *ptr;
2385
    MemoryRegion *mr;
2386
    hwaddr l = 4;
2387
    hwaddr addr1;
2388

    
2389
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2390
                                 true);
2391
    if (l < 4 || !memory_access_is_direct(mr, true)) {
2392
#if defined(TARGET_WORDS_BIGENDIAN)
2393
        if (endian == DEVICE_LITTLE_ENDIAN) {
2394
            val = bswap32(val);
2395
        }
2396
#else
2397
        if (endian == DEVICE_BIG_ENDIAN) {
2398
            val = bswap32(val);
2399
        }
2400
#endif
2401
        io_mem_write(mr, addr1, val, 4);
2402
    } else {
2403
        /* RAM case */
2404
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2405
        ptr = qemu_get_ram_ptr(addr1);
2406
        switch (endian) {
2407
        case DEVICE_LITTLE_ENDIAN:
2408
            stl_le_p(ptr, val);
2409
            break;
2410
        case DEVICE_BIG_ENDIAN:
2411
            stl_be_p(ptr, val);
2412
            break;
2413
        default:
2414
            stl_p(ptr, val);
2415
            break;
2416
        }
2417
        invalidate_and_set_dirty(addr1, 4);
2418
    }
2419
}
2420

    
2421
void stl_phys(hwaddr addr, uint32_t val)
2422
{
2423
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2424
}
2425

    
2426
void stl_le_phys(hwaddr addr, uint32_t val)
2427
{
2428
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2429
}
2430

    
2431
void stl_be_phys(hwaddr addr, uint32_t val)
2432
{
2433
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2434
}
2435

    
2436
/* XXX: optimize */
2437
void stb_phys(hwaddr addr, uint32_t val)
2438
{
2439
    uint8_t v = val;
2440
    cpu_physical_memory_write(addr, &v, 1);
2441
}
2442

    
2443
/* warning: addr must be aligned */
2444
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2445
                                     enum device_endian endian)
2446
{
2447
    uint8_t *ptr;
2448
    MemoryRegion *mr;
2449
    hwaddr l = 2;
2450
    hwaddr addr1;
2451

    
2452
    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2453
                                 true);
2454
    if (l < 2 || !memory_access_is_direct(mr, true)) {
2455
#if defined(TARGET_WORDS_BIGENDIAN)
2456
        if (endian == DEVICE_LITTLE_ENDIAN) {
2457
            val = bswap16(val);
2458
        }
2459
#else
2460
        if (endian == DEVICE_BIG_ENDIAN) {
2461
            val = bswap16(val);
2462
        }
2463
#endif
2464
        io_mem_write(mr, addr1, val, 2);
2465
    } else {
2466
        /* RAM case */
2467
        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2468
        ptr = qemu_get_ram_ptr(addr1);
2469
        switch (endian) {
2470
        case DEVICE_LITTLE_ENDIAN:
2471
            stw_le_p(ptr, val);
2472
            break;
2473
        case DEVICE_BIG_ENDIAN:
2474
            stw_be_p(ptr, val);
2475
            break;
2476
        default:
2477
            stw_p(ptr, val);
2478
            break;
2479
        }
2480
        invalidate_and_set_dirty(addr1, 2);
2481
    }
2482
}
2483

    
2484
void stw_phys(hwaddr addr, uint32_t val)
2485
{
2486
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2487
}
2488

    
2489
void stw_le_phys(hwaddr addr, uint32_t val)
2490
{
2491
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2492
}
2493

    
2494
void stw_be_phys(hwaddr addr, uint32_t val)
2495
{
2496
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2497
}
2498

    
2499
/* XXX: optimize */
2500
void stq_phys(hwaddr addr, uint64_t val)
2501
{
2502
    val = tswap64(val);
2503
    cpu_physical_memory_write(addr, &val, 8);
2504
}
2505

    
2506
void stq_le_phys(hwaddr addr, uint64_t val)
2507
{
2508
    val = cpu_to_le64(val);
2509
    cpu_physical_memory_write(addr, &val, 8);
2510
}
2511

    
2512
void stq_be_phys(hwaddr addr, uint64_t val)
2513
{
2514
    val = cpu_to_be64(val);
2515
    cpu_physical_memory_write(addr, &val, 8);
2516
}
2517

    
2518
/* virtual memory access for debug (includes writing to ROM) */
2519
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2520
                        uint8_t *buf, int len, int is_write)
2521
{
2522
    int l;
2523
    hwaddr phys_addr;
2524
    target_ulong page;
2525

    
2526
    while (len > 0) {
2527
        page = addr & TARGET_PAGE_MASK;
2528
        phys_addr = cpu_get_phys_page_debug(env, page);
2529
        /* if no physical page mapped, return an error */
2530
        if (phys_addr == -1)
2531
            return -1;
2532
        l = (page + TARGET_PAGE_SIZE) - addr;
2533
        if (l > len)
2534
            l = len;
2535
        phys_addr += (addr & ~TARGET_PAGE_MASK);
2536
        if (is_write)
2537
            cpu_physical_memory_write_rom(phys_addr, buf, l);
2538
        else
2539
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2540
        len -= l;
2541
        buf += l;
2542
        addr += l;
2543
    }
2544
    return 0;
2545
}
2546
#endif
2547

    
2548
#if !defined(CONFIG_USER_ONLY)
2549

    
2550
/*
2551
 * A helper function for the _utterly broken_ virtio device model to find out if
2552
 * it's running on a big endian machine. Don't do this at home kids!
2553
 */
2554
bool virtio_is_big_endian(void);
2555
bool virtio_is_big_endian(void)
2556
{
2557
#if defined(TARGET_WORDS_BIGENDIAN)
2558
    return true;
2559
#else
2560
    return false;
2561
#endif
2562
}
2563

    
2564
#endif
2565

    
2566
#ifndef CONFIG_USER_ONLY
2567
bool cpu_physical_memory_is_io(hwaddr phys_addr)
2568
{
2569
    MemoryRegion*mr;
2570
    hwaddr l = 1;
2571

    
2572
    mr = address_space_translate(&address_space_memory,
2573
                                 phys_addr, &phys_addr, &l, false);
2574

    
2575
    return !(memory_region_is_ram(mr) ||
2576
             memory_region_is_romd(mr));
2577
}
2578

    
2579
void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2580
{
2581
    RAMBlock *block;
2582

    
2583
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2584
        func(block->host, block->offset, block->length, opaque);
2585
    }
2586
}
2587
#endif