Statistics
| Branch: | Revision:

root / exec.c @ 6fd2a026

History | View | Annotate | Download (122.2 kB)

1
/*
2
 *  virtual page mapping and translated block handling
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "osdep.h"
33
#include "kvm.h"
34
#include "hw/xen.h"
35
#include "qemu-timer.h"
36
#include "memory.h"
37
#include "exec-memory.h"
38
#if defined(CONFIG_USER_ONLY)
39
#include <qemu.h>
40
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41
#include <sys/param.h>
42
#if __FreeBSD_version >= 700104
43
#define HAVE_KINFO_GETVMMAP
44
#define sigqueue sigqueue_freebsd  /* avoid redefinition */
45
#include <sys/time.h>
46
#include <sys/proc.h>
47
#include <machine/profile.h>
48
#define _KERNEL
49
#include <sys/user.h>
50
#undef _KERNEL
51
#undef sigqueue
52
#include <libutil.h>
53
#endif
54
#endif
55
#else /* !CONFIG_USER_ONLY */
56
#include "xen-mapcache.h"
57
#include "trace.h"
58
#endif
59

    
60
#include "cputlb.h"
61

    
62
#define WANT_EXEC_OBSOLETE
63
#include "exec-obsolete.h"
64

    
65
//#define DEBUG_TB_INVALIDATE
66
//#define DEBUG_FLUSH
67
//#define DEBUG_UNASSIGNED
68

    
69
/* make various TB consistency checks */
70
//#define DEBUG_TB_CHECK
71

    
72
//#define DEBUG_IOPORT
73
//#define DEBUG_SUBPAGE
74

    
75
#if !defined(CONFIG_USER_ONLY)
76
/* TB consistency checks only implemented for usermode emulation.  */
77
#undef DEBUG_TB_CHECK
78
#endif
79

    
80
#define SMC_BITMAP_USE_THRESHOLD 10
81

    
82
static TranslationBlock *tbs;
83
static int code_gen_max_blocks;
84
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85
static int nb_tbs;
86
/* any access to the tbs or the page table must use this lock */
87
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88

    
89
#if defined(__arm__) || defined(__sparc__)
90
/* The prologue must be reachable with a direct jump. ARM and Sparc64
91
 have limited branch ranges (possibly also PPC) so place it in a
92
 section close to code segment. */
93
#define code_gen_section                                \
94
    __attribute__((__section__(".gen_code")))           \
95
    __attribute__((aligned (32)))
96
#elif defined(_WIN32) && !defined(_WIN64)
97
#define code_gen_section                                \
98
    __attribute__((aligned (16)))
99
#else
100
#define code_gen_section                                \
101
    __attribute__((aligned (32)))
102
#endif
103

    
104
uint8_t code_gen_prologue[1024] code_gen_section;
105
static uint8_t *code_gen_buffer;
106
static unsigned long code_gen_buffer_size;
107
/* threshold to flush the translated code buffer */
108
static unsigned long code_gen_buffer_max_size;
109
static uint8_t *code_gen_ptr;
110

    
111
#if !defined(CONFIG_USER_ONLY)
112
int phys_ram_fd;
113
static int in_migration;
114

    
115
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116

    
117
static MemoryRegion *system_memory;
118
static MemoryRegion *system_io;
119

    
120
MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121
static MemoryRegion io_mem_subpage_ram;
122

    
123
#endif
124

    
125
CPUArchState *first_cpu;
126
/* current CPU in the current thread. It is only valid inside
127
   cpu_exec() */
128
DEFINE_TLS(CPUArchState *,cpu_single_env);
129
/* 0 = Do not count executed instructions.
130
   1 = Precise instruction counting.
131
   2 = Adaptive rate instruction counting.  */
132
int use_icount = 0;
133

    
134
typedef struct PageDesc {
135
    /* list of TBs intersecting this ram page */
136
    TranslationBlock *first_tb;
137
    /* in order to optimize self modifying code, we count the number
138
       of lookups we do to a given page to use a bitmap */
139
    unsigned int code_write_count;
140
    uint8_t *code_bitmap;
141
#if defined(CONFIG_USER_ONLY)
142
    unsigned long flags;
143
#endif
144
} PageDesc;
145

    
146
/* In system mode we want L1_MAP to be based on ram offsets,
147
   while in user mode we want it to be based on virtual addresses.  */
148
#if !defined(CONFIG_USER_ONLY)
149
#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150
# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
151
#else
152
# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
153
#endif
154
#else
155
# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
156
#endif
157

    
158
/* Size of the L2 (and L3, etc) page tables.  */
159
#define L2_BITS 10
160
#define L2_SIZE (1 << L2_BITS)
161

    
162
#define P_L2_LEVELS \
163
    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164

    
165
/* The bits remaining after N lower levels of page tables.  */
166
#define V_L1_BITS_REM \
167
    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168

    
169
#if V_L1_BITS_REM < 4
170
#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
171
#else
172
#define V_L1_BITS  V_L1_BITS_REM
173
#endif
174

    
175
#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
176

    
177
#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178

    
179
uintptr_t qemu_real_host_page_size;
180
uintptr_t qemu_host_page_size;
181
uintptr_t qemu_host_page_mask;
182

    
183
/* This is a multi-level map on the virtual address space.
184
   The bottom level has pointers to PageDesc.  */
185
static void *l1_map[V_L1_SIZE];
186

    
187
#if !defined(CONFIG_USER_ONLY)
188
typedef struct PhysPageEntry PhysPageEntry;
189

    
190
static MemoryRegionSection *phys_sections;
191
static unsigned phys_sections_nb, phys_sections_nb_alloc;
192
static uint16_t phys_section_unassigned;
193
static uint16_t phys_section_notdirty;
194
static uint16_t phys_section_rom;
195
static uint16_t phys_section_watch;
196

    
197
struct PhysPageEntry {
198
    uint16_t is_leaf : 1;
199
     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200
    uint16_t ptr : 15;
201
};
202

    
203
/* Simple allocator for PhysPageEntry nodes */
204
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206

    
207
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208

    
209
/* This is a multi-level map on the physical address space.
210
   The bottom level has pointers to MemoryRegionSections.  */
211
static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212

    
213
static void io_mem_init(void);
214
static void memory_map_init(void);
215

    
216
static MemoryRegion io_mem_watch;
217
#endif
218

    
219
/* statistics */
220
static int tb_flush_count;
221
static int tb_phys_invalidate_count;
222

    
223
#ifdef _WIN32
224
static void map_exec(void *addr, long size)
225
{
226
    DWORD old_protect;
227
    VirtualProtect(addr, size,
228
                   PAGE_EXECUTE_READWRITE, &old_protect);
229
    
230
}
231
#else
232
static void map_exec(void *addr, long size)
233
{
234
    unsigned long start, end, page_size;
235
    
236
    page_size = getpagesize();
237
    start = (unsigned long)addr;
238
    start &= ~(page_size - 1);
239
    
240
    end = (unsigned long)addr + size;
241
    end += page_size - 1;
242
    end &= ~(page_size - 1);
243
    
244
    mprotect((void *)start, end - start,
245
             PROT_READ | PROT_WRITE | PROT_EXEC);
246
}
247
#endif
248

    
249
static void page_init(void)
250
{
251
    /* NOTE: we can always suppose that qemu_host_page_size >=
252
       TARGET_PAGE_SIZE */
253
#ifdef _WIN32
254
    {
255
        SYSTEM_INFO system_info;
256

    
257
        GetSystemInfo(&system_info);
258
        qemu_real_host_page_size = system_info.dwPageSize;
259
    }
260
#else
261
    qemu_real_host_page_size = getpagesize();
262
#endif
263
    if (qemu_host_page_size == 0)
264
        qemu_host_page_size = qemu_real_host_page_size;
265
    if (qemu_host_page_size < TARGET_PAGE_SIZE)
266
        qemu_host_page_size = TARGET_PAGE_SIZE;
267
    qemu_host_page_mask = ~(qemu_host_page_size - 1);
268

    
269
#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
270
    {
271
#ifdef HAVE_KINFO_GETVMMAP
272
        struct kinfo_vmentry *freep;
273
        int i, cnt;
274

    
275
        freep = kinfo_getvmmap(getpid(), &cnt);
276
        if (freep) {
277
            mmap_lock();
278
            for (i = 0; i < cnt; i++) {
279
                unsigned long startaddr, endaddr;
280

    
281
                startaddr = freep[i].kve_start;
282
                endaddr = freep[i].kve_end;
283
                if (h2g_valid(startaddr)) {
284
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
285

    
286
                    if (h2g_valid(endaddr)) {
287
                        endaddr = h2g(endaddr);
288
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
289
                    } else {
290
#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
291
                        endaddr = ~0ul;
292
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
293
#endif
294
                    }
295
                }
296
            }
297
            free(freep);
298
            mmap_unlock();
299
        }
300
#else
301
        FILE *f;
302

    
303
        last_brk = (unsigned long)sbrk(0);
304

    
305
        f = fopen("/compat/linux/proc/self/maps", "r");
306
        if (f) {
307
            mmap_lock();
308

    
309
            do {
310
                unsigned long startaddr, endaddr;
311
                int n;
312

    
313
                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
314

    
315
                if (n == 2 && h2g_valid(startaddr)) {
316
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
317

    
318
                    if (h2g_valid(endaddr)) {
319
                        endaddr = h2g(endaddr);
320
                    } else {
321
                        endaddr = ~0ul;
322
                    }
323
                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
324
                }
325
            } while (!feof(f));
326

    
327
            fclose(f);
328
            mmap_unlock();
329
        }
330
#endif
331
    }
332
#endif
333
}
334

    
335
static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
336
{
337
    PageDesc *pd;
338
    void **lp;
339
    int i;
340

    
341
#if defined(CONFIG_USER_ONLY)
342
    /* We can't use g_malloc because it may recurse into a locked mutex. */
343
# define ALLOC(P, SIZE)                                 \
344
    do {                                                \
345
        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
346
                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
347
    } while (0)
348
#else
349
# define ALLOC(P, SIZE) \
350
    do { P = g_malloc0(SIZE); } while (0)
351
#endif
352

    
353
    /* Level 1.  Always allocated.  */
354
    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
355

    
356
    /* Level 2..N-1.  */
357
    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
358
        void **p = *lp;
359

    
360
        if (p == NULL) {
361
            if (!alloc) {
362
                return NULL;
363
            }
364
            ALLOC(p, sizeof(void *) * L2_SIZE);
365
            *lp = p;
366
        }
367

    
368
        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
369
    }
370

    
371
    pd = *lp;
372
    if (pd == NULL) {
373
        if (!alloc) {
374
            return NULL;
375
        }
376
        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
377
        *lp = pd;
378
    }
379

    
380
#undef ALLOC
381

    
382
    return pd + (index & (L2_SIZE - 1));
383
}
384

    
385
static inline PageDesc *page_find(tb_page_addr_t index)
386
{
387
    return page_find_alloc(index, 0);
388
}
389

    
390
#if !defined(CONFIG_USER_ONLY)
391

    
392
static void phys_map_node_reserve(unsigned nodes)
393
{
394
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
395
        typedef PhysPageEntry Node[L2_SIZE];
396
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
397
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
398
                                      phys_map_nodes_nb + nodes);
399
        phys_map_nodes = g_renew(Node, phys_map_nodes,
400
                                 phys_map_nodes_nb_alloc);
401
    }
402
}
403

    
404
static uint16_t phys_map_node_alloc(void)
405
{
406
    unsigned i;
407
    uint16_t ret;
408

    
409
    ret = phys_map_nodes_nb++;
410
    assert(ret != PHYS_MAP_NODE_NIL);
411
    assert(ret != phys_map_nodes_nb_alloc);
412
    for (i = 0; i < L2_SIZE; ++i) {
413
        phys_map_nodes[ret][i].is_leaf = 0;
414
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
415
    }
416
    return ret;
417
}
418

    
419
static void phys_map_nodes_reset(void)
420
{
421
    phys_map_nodes_nb = 0;
422
}
423

    
424

    
425
static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
426
                                target_phys_addr_t *nb, uint16_t leaf,
427
                                int level)
428
{
429
    PhysPageEntry *p;
430
    int i;
431
    target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
432

    
433
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
434
        lp->ptr = phys_map_node_alloc();
435
        p = phys_map_nodes[lp->ptr];
436
        if (level == 0) {
437
            for (i = 0; i < L2_SIZE; i++) {
438
                p[i].is_leaf = 1;
439
                p[i].ptr = phys_section_unassigned;
440
            }
441
        }
442
    } else {
443
        p = phys_map_nodes[lp->ptr];
444
    }
445
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
446

    
447
    while (*nb && lp < &p[L2_SIZE]) {
448
        if ((*index & (step - 1)) == 0 && *nb >= step) {
449
            lp->is_leaf = true;
450
            lp->ptr = leaf;
451
            *index += step;
452
            *nb -= step;
453
        } else {
454
            phys_page_set_level(lp, index, nb, leaf, level - 1);
455
        }
456
        ++lp;
457
    }
458
}
459

    
460
static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
461
                          uint16_t leaf)
462
{
463
    /* Wildly overreserve - it doesn't matter much. */
464
    phys_map_node_reserve(3 * P_L2_LEVELS);
465

    
466
    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
467
}
468

    
469
MemoryRegionSection *phys_page_find(target_phys_addr_t index)
470
{
471
    PhysPageEntry lp = phys_map;
472
    PhysPageEntry *p;
473
    int i;
474
    uint16_t s_index = phys_section_unassigned;
475

    
476
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
477
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
478
            goto not_found;
479
        }
480
        p = phys_map_nodes[lp.ptr];
481
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
482
    }
483

    
484
    s_index = lp.ptr;
485
not_found:
486
    return &phys_sections[s_index];
487
}
488

    
489
bool memory_region_is_unassigned(MemoryRegion *mr)
490
{
491
    return mr != &io_mem_ram && mr != &io_mem_rom
492
        && mr != &io_mem_notdirty && !mr->rom_device
493
        && mr != &io_mem_watch;
494
}
495

    
496
#define mmap_lock() do { } while(0)
497
#define mmap_unlock() do { } while(0)
498
#endif
499

    
500
#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
501

    
502
#if defined(CONFIG_USER_ONLY)
503
/* Currently it is not recommended to allocate big chunks of data in
504
   user mode. It will change when a dedicated libc will be used */
505
#define USE_STATIC_CODE_GEN_BUFFER
506
#endif
507

    
508
#ifdef USE_STATIC_CODE_GEN_BUFFER
509
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
510
               __attribute__((aligned (CODE_GEN_ALIGN)));
511
#endif
512

    
513
static void code_gen_alloc(unsigned long tb_size)
514
{
515
#ifdef USE_STATIC_CODE_GEN_BUFFER
516
    code_gen_buffer = static_code_gen_buffer;
517
    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
518
    map_exec(code_gen_buffer, code_gen_buffer_size);
519
#else
520
    code_gen_buffer_size = tb_size;
521
    if (code_gen_buffer_size == 0) {
522
#if defined(CONFIG_USER_ONLY)
523
        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
524
#else
525
        /* XXX: needs adjustments */
526
        code_gen_buffer_size = (unsigned long)(ram_size / 4);
527
#endif
528
    }
529
    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
530
        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
531
    /* The code gen buffer location may have constraints depending on
532
       the host cpu and OS */
533
#if defined(__linux__) 
534
    {
535
        int flags;
536
        void *start = NULL;
537

    
538
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
539
#if defined(__x86_64__)
540
        flags |= MAP_32BIT;
541
        /* Cannot map more than that */
542
        if (code_gen_buffer_size > (800 * 1024 * 1024))
543
            code_gen_buffer_size = (800 * 1024 * 1024);
544
#elif defined(__sparc__) && HOST_LONG_BITS == 64
545
        // Map the buffer below 2G, so we can use direct calls and branches
546
        start = (void *) 0x40000000UL;
547
        if (code_gen_buffer_size > (512 * 1024 * 1024))
548
            code_gen_buffer_size = (512 * 1024 * 1024);
549
#elif defined(__arm__)
550
        /* Keep the buffer no bigger than 16MB to branch between blocks */
551
        if (code_gen_buffer_size > 16 * 1024 * 1024)
552
            code_gen_buffer_size = 16 * 1024 * 1024;
553
#elif defined(__s390x__)
554
        /* Map the buffer so that we can use direct calls and branches.  */
555
        /* We have a +- 4GB range on the branches; leave some slop.  */
556
        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
557
            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
558
        }
559
        start = (void *)0x90000000UL;
560
#endif
561
        code_gen_buffer = mmap(start, code_gen_buffer_size,
562
                               PROT_WRITE | PROT_READ | PROT_EXEC,
563
                               flags, -1, 0);
564
        if (code_gen_buffer == MAP_FAILED) {
565
            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
566
            exit(1);
567
        }
568
    }
569
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
570
    || defined(__DragonFly__) || defined(__OpenBSD__) \
571
    || defined(__NetBSD__)
572
    {
573
        int flags;
574
        void *addr = NULL;
575
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
576
#if defined(__x86_64__)
577
        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
578
         * 0x40000000 is free */
579
        flags |= MAP_FIXED;
580
        addr = (void *)0x40000000;
581
        /* Cannot map more than that */
582
        if (code_gen_buffer_size > (800 * 1024 * 1024))
583
            code_gen_buffer_size = (800 * 1024 * 1024);
584
#elif defined(__sparc__) && HOST_LONG_BITS == 64
585
        // Map the buffer below 2G, so we can use direct calls and branches
586
        addr = (void *) 0x40000000UL;
587
        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
588
            code_gen_buffer_size = (512 * 1024 * 1024);
589
        }
590
#endif
591
        code_gen_buffer = mmap(addr, code_gen_buffer_size,
592
                               PROT_WRITE | PROT_READ | PROT_EXEC, 
593
                               flags, -1, 0);
594
        if (code_gen_buffer == MAP_FAILED) {
595
            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
596
            exit(1);
597
        }
598
    }
599
#else
600
    code_gen_buffer = g_malloc(code_gen_buffer_size);
601
    map_exec(code_gen_buffer, code_gen_buffer_size);
602
#endif
603
#endif /* !USE_STATIC_CODE_GEN_BUFFER */
604
    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
605
    code_gen_buffer_max_size = code_gen_buffer_size -
606
        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
607
    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
608
    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
609
}
610

    
611
/* Must be called before using the QEMU cpus. 'tb_size' is the size
612
   (in bytes) allocated to the translation buffer. Zero means default
613
   size. */
614
void tcg_exec_init(unsigned long tb_size)
615
{
616
    cpu_gen_init();
617
    code_gen_alloc(tb_size);
618
    code_gen_ptr = code_gen_buffer;
619
    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
620
    page_init();
621
#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
622
    /* There's no guest base to take into account, so go ahead and
623
       initialize the prologue now.  */
624
    tcg_prologue_init(&tcg_ctx);
625
#endif
626
}
627

    
628
bool tcg_enabled(void)
629
{
630
    return code_gen_buffer != NULL;
631
}
632

    
633
void cpu_exec_init_all(void)
634
{
635
#if !defined(CONFIG_USER_ONLY)
636
    memory_map_init();
637
    io_mem_init();
638
#endif
639
}
640

    
641
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
642

    
643
static int cpu_common_post_load(void *opaque, int version_id)
644
{
645
    CPUArchState *env = opaque;
646

    
647
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
648
       version_id is increased. */
649
    env->interrupt_request &= ~0x01;
650
    tlb_flush(env, 1);
651

    
652
    return 0;
653
}
654

    
655
static const VMStateDescription vmstate_cpu_common = {
656
    .name = "cpu_common",
657
    .version_id = 1,
658
    .minimum_version_id = 1,
659
    .minimum_version_id_old = 1,
660
    .post_load = cpu_common_post_load,
661
    .fields      = (VMStateField []) {
662
        VMSTATE_UINT32(halted, CPUArchState),
663
        VMSTATE_UINT32(interrupt_request, CPUArchState),
664
        VMSTATE_END_OF_LIST()
665
    }
666
};
667
#endif
668

    
669
CPUArchState *qemu_get_cpu(int cpu)
670
{
671
    CPUArchState *env = first_cpu;
672

    
673
    while (env) {
674
        if (env->cpu_index == cpu)
675
            break;
676
        env = env->next_cpu;
677
    }
678

    
679
    return env;
680
}
681

    
682
void cpu_exec_init(CPUArchState *env)
683
{
684
    CPUArchState **penv;
685
    int cpu_index;
686

    
687
#if defined(CONFIG_USER_ONLY)
688
    cpu_list_lock();
689
#endif
690
    env->next_cpu = NULL;
691
    penv = &first_cpu;
692
    cpu_index = 0;
693
    while (*penv != NULL) {
694
        penv = &(*penv)->next_cpu;
695
        cpu_index++;
696
    }
697
    env->cpu_index = cpu_index;
698
    env->numa_node = 0;
699
    QTAILQ_INIT(&env->breakpoints);
700
    QTAILQ_INIT(&env->watchpoints);
701
#ifndef CONFIG_USER_ONLY
702
    env->thread_id = qemu_get_thread_id();
703
#endif
704
    *penv = env;
705
#if defined(CONFIG_USER_ONLY)
706
    cpu_list_unlock();
707
#endif
708
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
709
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
710
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
711
                    cpu_save, cpu_load, env);
712
#endif
713
}
714

    
715
/* Allocate a new translation block. Flush the translation buffer if
716
   too many translation blocks or too much generated code. */
717
static TranslationBlock *tb_alloc(target_ulong pc)
718
{
719
    TranslationBlock *tb;
720

    
721
    if (nb_tbs >= code_gen_max_blocks ||
722
        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
723
        return NULL;
724
    tb = &tbs[nb_tbs++];
725
    tb->pc = pc;
726
    tb->cflags = 0;
727
    return tb;
728
}
729

    
730
void tb_free(TranslationBlock *tb)
731
{
732
    /* In practice this is mostly used for single use temporary TB
733
       Ignore the hard cases and just back up if this TB happens to
734
       be the last one generated.  */
735
    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
736
        code_gen_ptr = tb->tc_ptr;
737
        nb_tbs--;
738
    }
739
}
740

    
741
static inline void invalidate_page_bitmap(PageDesc *p)
742
{
743
    if (p->code_bitmap) {
744
        g_free(p->code_bitmap);
745
        p->code_bitmap = NULL;
746
    }
747
    p->code_write_count = 0;
748
}
749

    
750
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
751

    
752
static void page_flush_tb_1 (int level, void **lp)
753
{
754
    int i;
755

    
756
    if (*lp == NULL) {
757
        return;
758
    }
759
    if (level == 0) {
760
        PageDesc *pd = *lp;
761
        for (i = 0; i < L2_SIZE; ++i) {
762
            pd[i].first_tb = NULL;
763
            invalidate_page_bitmap(pd + i);
764
        }
765
    } else {
766
        void **pp = *lp;
767
        for (i = 0; i < L2_SIZE; ++i) {
768
            page_flush_tb_1 (level - 1, pp + i);
769
        }
770
    }
771
}
772

    
773
static void page_flush_tb(void)
774
{
775
    int i;
776
    for (i = 0; i < V_L1_SIZE; i++) {
777
        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
778
    }
779
}
780

    
781
/* flush all the translation blocks */
782
/* XXX: tb_flush is currently not thread safe */
783
void tb_flush(CPUArchState *env1)
784
{
785
    CPUArchState *env;
786
#if defined(DEBUG_FLUSH)
787
    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
788
           (unsigned long)(code_gen_ptr - code_gen_buffer),
789
           nb_tbs, nb_tbs > 0 ?
790
           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
791
#endif
792
    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
793
        cpu_abort(env1, "Internal error: code buffer overflow\n");
794

    
795
    nb_tbs = 0;
796

    
797
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
798
        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
799
    }
800

    
801
    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
802
    page_flush_tb();
803

    
804
    code_gen_ptr = code_gen_buffer;
805
    /* XXX: flush processor icache at this point if cache flush is
806
       expensive */
807
    tb_flush_count++;
808
}
809

    
810
#ifdef DEBUG_TB_CHECK
811

    
812
static void tb_invalidate_check(target_ulong address)
813
{
814
    TranslationBlock *tb;
815
    int i;
816
    address &= TARGET_PAGE_MASK;
817
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
818
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
819
            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
820
                  address >= tb->pc + tb->size)) {
821
                printf("ERROR invalidate: address=" TARGET_FMT_lx
822
                       " PC=%08lx size=%04x\n",
823
                       address, (long)tb->pc, tb->size);
824
            }
825
        }
826
    }
827
}
828

    
829
/* verify that all the pages have correct rights for code */
830
static void tb_page_check(void)
831
{
832
    TranslationBlock *tb;
833
    int i, flags1, flags2;
834

    
835
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
836
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
837
            flags1 = page_get_flags(tb->pc);
838
            flags2 = page_get_flags(tb->pc + tb->size - 1);
839
            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
840
                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
841
                       (long)tb->pc, tb->size, flags1, flags2);
842
            }
843
        }
844
    }
845
}
846

    
847
#endif
848

    
849
/* invalidate one TB */
850
static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
851
                             int next_offset)
852
{
853
    TranslationBlock *tb1;
854
    for(;;) {
855
        tb1 = *ptb;
856
        if (tb1 == tb) {
857
            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
858
            break;
859
        }
860
        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
861
    }
862
}
863

    
864
static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
865
{
866
    TranslationBlock *tb1;
867
    unsigned int n1;
868

    
869
    for(;;) {
870
        tb1 = *ptb;
871
        n1 = (uintptr_t)tb1 & 3;
872
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
873
        if (tb1 == tb) {
874
            *ptb = tb1->page_next[n1];
875
            break;
876
        }
877
        ptb = &tb1->page_next[n1];
878
    }
879
}
880

    
881
static inline void tb_jmp_remove(TranslationBlock *tb, int n)
882
{
883
    TranslationBlock *tb1, **ptb;
884
    unsigned int n1;
885

    
886
    ptb = &tb->jmp_next[n];
887
    tb1 = *ptb;
888
    if (tb1) {
889
        /* find tb(n) in circular list */
890
        for(;;) {
891
            tb1 = *ptb;
892
            n1 = (uintptr_t)tb1 & 3;
893
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
894
            if (n1 == n && tb1 == tb)
895
                break;
896
            if (n1 == 2) {
897
                ptb = &tb1->jmp_first;
898
            } else {
899
                ptb = &tb1->jmp_next[n1];
900
            }
901
        }
902
        /* now we can suppress tb(n) from the list */
903
        *ptb = tb->jmp_next[n];
904

    
905
        tb->jmp_next[n] = NULL;
906
    }
907
}
908

    
909
/* reset the jump entry 'n' of a TB so that it is not chained to
910
   another TB */
911
static inline void tb_reset_jump(TranslationBlock *tb, int n)
912
{
913
    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
914
}
915

    
916
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
917
{
918
    CPUArchState *env;
919
    PageDesc *p;
920
    unsigned int h, n1;
921
    tb_page_addr_t phys_pc;
922
    TranslationBlock *tb1, *tb2;
923

    
924
    /* remove the TB from the hash list */
925
    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
926
    h = tb_phys_hash_func(phys_pc);
927
    tb_remove(&tb_phys_hash[h], tb,
928
              offsetof(TranslationBlock, phys_hash_next));
929

    
930
    /* remove the TB from the page list */
931
    if (tb->page_addr[0] != page_addr) {
932
        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
933
        tb_page_remove(&p->first_tb, tb);
934
        invalidate_page_bitmap(p);
935
    }
936
    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
937
        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
938
        tb_page_remove(&p->first_tb, tb);
939
        invalidate_page_bitmap(p);
940
    }
941

    
942
    tb_invalidated_flag = 1;
943

    
944
    /* remove the TB from the hash list */
945
    h = tb_jmp_cache_hash_func(tb->pc);
946
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
947
        if (env->tb_jmp_cache[h] == tb)
948
            env->tb_jmp_cache[h] = NULL;
949
    }
950

    
951
    /* suppress this TB from the two jump lists */
952
    tb_jmp_remove(tb, 0);
953
    tb_jmp_remove(tb, 1);
954

    
955
    /* suppress any remaining jumps to this TB */
956
    tb1 = tb->jmp_first;
957
    for(;;) {
958
        n1 = (uintptr_t)tb1 & 3;
959
        if (n1 == 2)
960
            break;
961
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
962
        tb2 = tb1->jmp_next[n1];
963
        tb_reset_jump(tb1, n1);
964
        tb1->jmp_next[n1] = NULL;
965
        tb1 = tb2;
966
    }
967
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
968

    
969
    tb_phys_invalidate_count++;
970
}
971

    
972
static inline void set_bits(uint8_t *tab, int start, int len)
973
{
974
    int end, mask, end1;
975

    
976
    end = start + len;
977
    tab += start >> 3;
978
    mask = 0xff << (start & 7);
979
    if ((start & ~7) == (end & ~7)) {
980
        if (start < end) {
981
            mask &= ~(0xff << (end & 7));
982
            *tab |= mask;
983
        }
984
    } else {
985
        *tab++ |= mask;
986
        start = (start + 8) & ~7;
987
        end1 = end & ~7;
988
        while (start < end1) {
989
            *tab++ = 0xff;
990
            start += 8;
991
        }
992
        if (start < end) {
993
            mask = ~(0xff << (end & 7));
994
            *tab |= mask;
995
        }
996
    }
997
}
998

    
999
static void build_page_bitmap(PageDesc *p)
1000
{
1001
    int n, tb_start, tb_end;
1002
    TranslationBlock *tb;
1003

    
1004
    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1005

    
1006
    tb = p->first_tb;
1007
    while (tb != NULL) {
1008
        n = (uintptr_t)tb & 3;
1009
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1010
        /* NOTE: this is subtle as a TB may span two physical pages */
1011
        if (n == 0) {
1012
            /* NOTE: tb_end may be after the end of the page, but
1013
               it is not a problem */
1014
            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1015
            tb_end = tb_start + tb->size;
1016
            if (tb_end > TARGET_PAGE_SIZE)
1017
                tb_end = TARGET_PAGE_SIZE;
1018
        } else {
1019
            tb_start = 0;
1020
            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1021
        }
1022
        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1023
        tb = tb->page_next[n];
1024
    }
1025
}
1026

    
1027
TranslationBlock *tb_gen_code(CPUArchState *env,
1028
                              target_ulong pc, target_ulong cs_base,
1029
                              int flags, int cflags)
1030
{
1031
    TranslationBlock *tb;
1032
    uint8_t *tc_ptr;
1033
    tb_page_addr_t phys_pc, phys_page2;
1034
    target_ulong virt_page2;
1035
    int code_gen_size;
1036

    
1037
    phys_pc = get_page_addr_code(env, pc);
1038
    tb = tb_alloc(pc);
1039
    if (!tb) {
1040
        /* flush must be done */
1041
        tb_flush(env);
1042
        /* cannot fail at this point */
1043
        tb = tb_alloc(pc);
1044
        /* Don't forget to invalidate previous TB info.  */
1045
        tb_invalidated_flag = 1;
1046
    }
1047
    tc_ptr = code_gen_ptr;
1048
    tb->tc_ptr = tc_ptr;
1049
    tb->cs_base = cs_base;
1050
    tb->flags = flags;
1051
    tb->cflags = cflags;
1052
    cpu_gen_code(env, tb, &code_gen_size);
1053
    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1054
                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1055

    
1056
    /* check next page if needed */
1057
    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1058
    phys_page2 = -1;
1059
    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1060
        phys_page2 = get_page_addr_code(env, virt_page2);
1061
    }
1062
    tb_link_page(tb, phys_pc, phys_page2);
1063
    return tb;
1064
}
1065

    
1066
/*
1067
 * Invalidate all TBs which intersect with the target physical address range
1068
 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1069
 * 'is_cpu_write_access' should be true if called from a real cpu write
1070
 * access: the virtual CPU will exit the current TB if code is modified inside
1071
 * this TB.
1072
 */
1073
void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1074
                              int is_cpu_write_access)
1075
{
1076
    while (start < end) {
1077
        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1078
        start &= TARGET_PAGE_MASK;
1079
        start += TARGET_PAGE_SIZE;
1080
    }
1081
}
1082

    
1083
/*
1084
 * Invalidate all TBs which intersect with the target physical address range
1085
 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1086
 * 'is_cpu_write_access' should be true if called from a real cpu write
1087
 * access: the virtual CPU will exit the current TB if code is modified inside
1088
 * this TB.
1089
 */
1090
void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1091
                                   int is_cpu_write_access)
1092
{
1093
    TranslationBlock *tb, *tb_next, *saved_tb;
1094
    CPUArchState *env = cpu_single_env;
1095
    tb_page_addr_t tb_start, tb_end;
1096
    PageDesc *p;
1097
    int n;
1098
#ifdef TARGET_HAS_PRECISE_SMC
1099
    int current_tb_not_found = is_cpu_write_access;
1100
    TranslationBlock *current_tb = NULL;
1101
    int current_tb_modified = 0;
1102
    target_ulong current_pc = 0;
1103
    target_ulong current_cs_base = 0;
1104
    int current_flags = 0;
1105
#endif /* TARGET_HAS_PRECISE_SMC */
1106

    
1107
    p = page_find(start >> TARGET_PAGE_BITS);
1108
    if (!p)
1109
        return;
1110
    if (!p->code_bitmap &&
1111
        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1112
        is_cpu_write_access) {
1113
        /* build code bitmap */
1114
        build_page_bitmap(p);
1115
    }
1116

    
1117
    /* we remove all the TBs in the range [start, end[ */
1118
    /* XXX: see if in some cases it could be faster to invalidate all the code */
1119
    tb = p->first_tb;
1120
    while (tb != NULL) {
1121
        n = (uintptr_t)tb & 3;
1122
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1123
        tb_next = tb->page_next[n];
1124
        /* NOTE: this is subtle as a TB may span two physical pages */
1125
        if (n == 0) {
1126
            /* NOTE: tb_end may be after the end of the page, but
1127
               it is not a problem */
1128
            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1129
            tb_end = tb_start + tb->size;
1130
        } else {
1131
            tb_start = tb->page_addr[1];
1132
            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1133
        }
1134
        if (!(tb_end <= start || tb_start >= end)) {
1135
#ifdef TARGET_HAS_PRECISE_SMC
1136
            if (current_tb_not_found) {
1137
                current_tb_not_found = 0;
1138
                current_tb = NULL;
1139
                if (env->mem_io_pc) {
1140
                    /* now we have a real cpu fault */
1141
                    current_tb = tb_find_pc(env->mem_io_pc);
1142
                }
1143
            }
1144
            if (current_tb == tb &&
1145
                (current_tb->cflags & CF_COUNT_MASK) != 1) {
1146
                /* If we are modifying the current TB, we must stop
1147
                its execution. We could be more precise by checking
1148
                that the modification is after the current PC, but it
1149
                would require a specialized function to partially
1150
                restore the CPU state */
1151

    
1152
                current_tb_modified = 1;
1153
                cpu_restore_state(current_tb, env, env->mem_io_pc);
1154
                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1155
                                     &current_flags);
1156
            }
1157
#endif /* TARGET_HAS_PRECISE_SMC */
1158
            /* we need to do that to handle the case where a signal
1159
               occurs while doing tb_phys_invalidate() */
1160
            saved_tb = NULL;
1161
            if (env) {
1162
                saved_tb = env->current_tb;
1163
                env->current_tb = NULL;
1164
            }
1165
            tb_phys_invalidate(tb, -1);
1166
            if (env) {
1167
                env->current_tb = saved_tb;
1168
                if (env->interrupt_request && env->current_tb)
1169
                    cpu_interrupt(env, env->interrupt_request);
1170
            }
1171
        }
1172
        tb = tb_next;
1173
    }
1174
#if !defined(CONFIG_USER_ONLY)
1175
    /* if no code remaining, no need to continue to use slow writes */
1176
    if (!p->first_tb) {
1177
        invalidate_page_bitmap(p);
1178
        if (is_cpu_write_access) {
1179
            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1180
        }
1181
    }
1182
#endif
1183
#ifdef TARGET_HAS_PRECISE_SMC
1184
    if (current_tb_modified) {
1185
        /* we generate a block containing just the instruction
1186
           modifying the memory. It will ensure that it cannot modify
1187
           itself */
1188
        env->current_tb = NULL;
1189
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1190
        cpu_resume_from_signal(env, NULL);
1191
    }
1192
#endif
1193
}
1194

    
1195
/* len must be <= 8 and start must be a multiple of len */
1196
static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1197
{
1198
    PageDesc *p;
1199
    int offset, b;
1200
#if 0
1201
    if (1) {
1202
        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1203
                  cpu_single_env->mem_io_vaddr, len,
1204
                  cpu_single_env->eip,
1205
                  cpu_single_env->eip +
1206
                  (intptr_t)cpu_single_env->segs[R_CS].base);
1207
    }
1208
#endif
1209
    p = page_find(start >> TARGET_PAGE_BITS);
1210
    if (!p)
1211
        return;
1212
    if (p->code_bitmap) {
1213
        offset = start & ~TARGET_PAGE_MASK;
1214
        b = p->code_bitmap[offset >> 3] >> (offset & 7);
1215
        if (b & ((1 << len) - 1))
1216
            goto do_invalidate;
1217
    } else {
1218
    do_invalidate:
1219
        tb_invalidate_phys_page_range(start, start + len, 1);
1220
    }
1221
}
1222

    
1223
#if !defined(CONFIG_SOFTMMU)
1224
static void tb_invalidate_phys_page(tb_page_addr_t addr,
1225
                                    uintptr_t pc, void *puc)
1226
{
1227
    TranslationBlock *tb;
1228
    PageDesc *p;
1229
    int n;
1230
#ifdef TARGET_HAS_PRECISE_SMC
1231
    TranslationBlock *current_tb = NULL;
1232
    CPUArchState *env = cpu_single_env;
1233
    int current_tb_modified = 0;
1234
    target_ulong current_pc = 0;
1235
    target_ulong current_cs_base = 0;
1236
    int current_flags = 0;
1237
#endif
1238

    
1239
    addr &= TARGET_PAGE_MASK;
1240
    p = page_find(addr >> TARGET_PAGE_BITS);
1241
    if (!p)
1242
        return;
1243
    tb = p->first_tb;
1244
#ifdef TARGET_HAS_PRECISE_SMC
1245
    if (tb && pc != 0) {
1246
        current_tb = tb_find_pc(pc);
1247
    }
1248
#endif
1249
    while (tb != NULL) {
1250
        n = (uintptr_t)tb & 3;
1251
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1252
#ifdef TARGET_HAS_PRECISE_SMC
1253
        if (current_tb == tb &&
1254
            (current_tb->cflags & CF_COUNT_MASK) != 1) {
1255
                /* If we are modifying the current TB, we must stop
1256
                   its execution. We could be more precise by checking
1257
                   that the modification is after the current PC, but it
1258
                   would require a specialized function to partially
1259
                   restore the CPU state */
1260

    
1261
            current_tb_modified = 1;
1262
            cpu_restore_state(current_tb, env, pc);
1263
            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1264
                                 &current_flags);
1265
        }
1266
#endif /* TARGET_HAS_PRECISE_SMC */
1267
        tb_phys_invalidate(tb, addr);
1268
        tb = tb->page_next[n];
1269
    }
1270
    p->first_tb = NULL;
1271
#ifdef TARGET_HAS_PRECISE_SMC
1272
    if (current_tb_modified) {
1273
        /* we generate a block containing just the instruction
1274
           modifying the memory. It will ensure that it cannot modify
1275
           itself */
1276
        env->current_tb = NULL;
1277
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1278
        cpu_resume_from_signal(env, puc);
1279
    }
1280
#endif
1281
}
1282
#endif
1283

    
1284
/* add the tb in the target page and protect it if necessary */
1285
static inline void tb_alloc_page(TranslationBlock *tb,
1286
                                 unsigned int n, tb_page_addr_t page_addr)
1287
{
1288
    PageDesc *p;
1289
#ifndef CONFIG_USER_ONLY
1290
    bool page_already_protected;
1291
#endif
1292

    
1293
    tb->page_addr[n] = page_addr;
1294
    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1295
    tb->page_next[n] = p->first_tb;
1296
#ifndef CONFIG_USER_ONLY
1297
    page_already_protected = p->first_tb != NULL;
1298
#endif
1299
    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1300
    invalidate_page_bitmap(p);
1301

    
1302
#if defined(TARGET_HAS_SMC) || 1
1303

    
1304
#if defined(CONFIG_USER_ONLY)
1305
    if (p->flags & PAGE_WRITE) {
1306
        target_ulong addr;
1307
        PageDesc *p2;
1308
        int prot;
1309

    
1310
        /* force the host page as non writable (writes will have a
1311
           page fault + mprotect overhead) */
1312
        page_addr &= qemu_host_page_mask;
1313
        prot = 0;
1314
        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1315
            addr += TARGET_PAGE_SIZE) {
1316

    
1317
            p2 = page_find (addr >> TARGET_PAGE_BITS);
1318
            if (!p2)
1319
                continue;
1320
            prot |= p2->flags;
1321
            p2->flags &= ~PAGE_WRITE;
1322
          }
1323
        mprotect(g2h(page_addr), qemu_host_page_size,
1324
                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1325
#ifdef DEBUG_TB_INVALIDATE
1326
        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1327
               page_addr);
1328
#endif
1329
    }
1330
#else
1331
    /* if some code is already present, then the pages are already
1332
       protected. So we handle the case where only the first TB is
1333
       allocated in a physical page */
1334
    if (!page_already_protected) {
1335
        tlb_protect_code(page_addr);
1336
    }
1337
#endif
1338

    
1339
#endif /* TARGET_HAS_SMC */
1340
}
1341

    
1342
/* add a new TB and link it to the physical page tables. phys_page2 is
1343
   (-1) to indicate that only one page contains the TB. */
1344
void tb_link_page(TranslationBlock *tb,
1345
                  tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1346
{
1347
    unsigned int h;
1348
    TranslationBlock **ptb;
1349

    
1350
    /* Grab the mmap lock to stop another thread invalidating this TB
1351
       before we are done.  */
1352
    mmap_lock();
1353
    /* add in the physical hash table */
1354
    h = tb_phys_hash_func(phys_pc);
1355
    ptb = &tb_phys_hash[h];
1356
    tb->phys_hash_next = *ptb;
1357
    *ptb = tb;
1358

    
1359
    /* add in the page list */
1360
    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1361
    if (phys_page2 != -1)
1362
        tb_alloc_page(tb, 1, phys_page2);
1363
    else
1364
        tb->page_addr[1] = -1;
1365

    
1366
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1367
    tb->jmp_next[0] = NULL;
1368
    tb->jmp_next[1] = NULL;
1369

    
1370
    /* init original jump addresses */
1371
    if (tb->tb_next_offset[0] != 0xffff)
1372
        tb_reset_jump(tb, 0);
1373
    if (tb->tb_next_offset[1] != 0xffff)
1374
        tb_reset_jump(tb, 1);
1375

    
1376
#ifdef DEBUG_TB_CHECK
1377
    tb_page_check();
1378
#endif
1379
    mmap_unlock();
1380
}
1381

    
1382
/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1383
   tb[1].tc_ptr. Return NULL if not found */
1384
TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1385
{
1386
    int m_min, m_max, m;
1387
    uintptr_t v;
1388
    TranslationBlock *tb;
1389

    
1390
    if (nb_tbs <= 0)
1391
        return NULL;
1392
    if (tc_ptr < (uintptr_t)code_gen_buffer ||
1393
        tc_ptr >= (uintptr_t)code_gen_ptr) {
1394
        return NULL;
1395
    }
1396
    /* binary search (cf Knuth) */
1397
    m_min = 0;
1398
    m_max = nb_tbs - 1;
1399
    while (m_min <= m_max) {
1400
        m = (m_min + m_max) >> 1;
1401
        tb = &tbs[m];
1402
        v = (uintptr_t)tb->tc_ptr;
1403
        if (v == tc_ptr)
1404
            return tb;
1405
        else if (tc_ptr < v) {
1406
            m_max = m - 1;
1407
        } else {
1408
            m_min = m + 1;
1409
        }
1410
    }
1411
    return &tbs[m_max];
1412
}
1413

    
1414
static void tb_reset_jump_recursive(TranslationBlock *tb);
1415

    
1416
static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1417
{
1418
    TranslationBlock *tb1, *tb_next, **ptb;
1419
    unsigned int n1;
1420

    
1421
    tb1 = tb->jmp_next[n];
1422
    if (tb1 != NULL) {
1423
        /* find head of list */
1424
        for(;;) {
1425
            n1 = (uintptr_t)tb1 & 3;
1426
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1427
            if (n1 == 2)
1428
                break;
1429
            tb1 = tb1->jmp_next[n1];
1430
        }
1431
        /* we are now sure now that tb jumps to tb1 */
1432
        tb_next = tb1;
1433

    
1434
        /* remove tb from the jmp_first list */
1435
        ptb = &tb_next->jmp_first;
1436
        for(;;) {
1437
            tb1 = *ptb;
1438
            n1 = (uintptr_t)tb1 & 3;
1439
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1440
            if (n1 == n && tb1 == tb)
1441
                break;
1442
            ptb = &tb1->jmp_next[n1];
1443
        }
1444
        *ptb = tb->jmp_next[n];
1445
        tb->jmp_next[n] = NULL;
1446

    
1447
        /* suppress the jump to next tb in generated code */
1448
        tb_reset_jump(tb, n);
1449

    
1450
        /* suppress jumps in the tb on which we could have jumped */
1451
        tb_reset_jump_recursive(tb_next);
1452
    }
1453
}
1454

    
1455
static void tb_reset_jump_recursive(TranslationBlock *tb)
1456
{
1457
    tb_reset_jump_recursive2(tb, 0);
1458
    tb_reset_jump_recursive2(tb, 1);
1459
}
1460

    
1461
#if defined(TARGET_HAS_ICE)
1462
#if defined(CONFIG_USER_ONLY)
1463
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1464
{
1465
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
1466
}
1467
#else
1468
void tb_invalidate_phys_addr(target_phys_addr_t addr)
1469
{
1470
    ram_addr_t ram_addr;
1471
    MemoryRegionSection *section;
1472

    
1473
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
1474
    if (!(memory_region_is_ram(section->mr)
1475
          || (section->mr->rom_device && section->mr->readable))) {
1476
        return;
1477
    }
1478
    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1479
        + memory_region_section_addr(section, addr);
1480
    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1481
}
1482

    
1483
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1484
{
1485
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1486
            (pc & ~TARGET_PAGE_MASK));
1487
}
1488
#endif
1489
#endif /* TARGET_HAS_ICE */
1490

    
1491
#if defined(CONFIG_USER_ONLY)
1492
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1493

    
1494
{
1495
}
1496

    
1497
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1498
                          int flags, CPUWatchpoint **watchpoint)
1499
{
1500
    return -ENOSYS;
1501
}
1502
#else
1503
/* Add a watchpoint.  */
1504
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1505
                          int flags, CPUWatchpoint **watchpoint)
1506
{
1507
    target_ulong len_mask = ~(len - 1);
1508
    CPUWatchpoint *wp;
1509

    
1510
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1511
    if ((len & (len - 1)) || (addr & ~len_mask) ||
1512
            len == 0 || len > TARGET_PAGE_SIZE) {
1513
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1514
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1515
        return -EINVAL;
1516
    }
1517
    wp = g_malloc(sizeof(*wp));
1518

    
1519
    wp->vaddr = addr;
1520
    wp->len_mask = len_mask;
1521
    wp->flags = flags;
1522

    
1523
    /* keep all GDB-injected watchpoints in front */
1524
    if (flags & BP_GDB)
1525
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1526
    else
1527
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1528

    
1529
    tlb_flush_page(env, addr);
1530

    
1531
    if (watchpoint)
1532
        *watchpoint = wp;
1533
    return 0;
1534
}
1535

    
1536
/* Remove a specific watchpoint.  */
1537
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1538
                          int flags)
1539
{
1540
    target_ulong len_mask = ~(len - 1);
1541
    CPUWatchpoint *wp;
1542

    
1543
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1544
        if (addr == wp->vaddr && len_mask == wp->len_mask
1545
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1546
            cpu_watchpoint_remove_by_ref(env, wp);
1547
            return 0;
1548
        }
1549
    }
1550
    return -ENOENT;
1551
}
1552

    
1553
/* Remove a specific watchpoint by reference.  */
1554
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1555
{
1556
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1557

    
1558
    tlb_flush_page(env, watchpoint->vaddr);
1559

    
1560
    g_free(watchpoint);
1561
}
1562

    
1563
/* Remove all matching watchpoints.  */
1564
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1565
{
1566
    CPUWatchpoint *wp, *next;
1567

    
1568
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1569
        if (wp->flags & mask)
1570
            cpu_watchpoint_remove_by_ref(env, wp);
1571
    }
1572
}
1573
#endif
1574

    
1575
/* Add a breakpoint.  */
1576
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1577
                          CPUBreakpoint **breakpoint)
1578
{
1579
#if defined(TARGET_HAS_ICE)
1580
    CPUBreakpoint *bp;
1581

    
1582
    bp = g_malloc(sizeof(*bp));
1583

    
1584
    bp->pc = pc;
1585
    bp->flags = flags;
1586

    
1587
    /* keep all GDB-injected breakpoints in front */
1588
    if (flags & BP_GDB)
1589
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1590
    else
1591
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1592

    
1593
    breakpoint_invalidate(env, pc);
1594

    
1595
    if (breakpoint)
1596
        *breakpoint = bp;
1597
    return 0;
1598
#else
1599
    return -ENOSYS;
1600
#endif
1601
}
1602

    
1603
/* Remove a specific breakpoint.  */
1604
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1605
{
1606
#if defined(TARGET_HAS_ICE)
1607
    CPUBreakpoint *bp;
1608

    
1609
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1610
        if (bp->pc == pc && bp->flags == flags) {
1611
            cpu_breakpoint_remove_by_ref(env, bp);
1612
            return 0;
1613
        }
1614
    }
1615
    return -ENOENT;
1616
#else
1617
    return -ENOSYS;
1618
#endif
1619
}
1620

    
1621
/* Remove a specific breakpoint by reference.  */
1622
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1623
{
1624
#if defined(TARGET_HAS_ICE)
1625
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1626

    
1627
    breakpoint_invalidate(env, breakpoint->pc);
1628

    
1629
    g_free(breakpoint);
1630
#endif
1631
}
1632

    
1633
/* Remove all matching breakpoints. */
1634
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1635
{
1636
#if defined(TARGET_HAS_ICE)
1637
    CPUBreakpoint *bp, *next;
1638

    
1639
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1640
        if (bp->flags & mask)
1641
            cpu_breakpoint_remove_by_ref(env, bp);
1642
    }
1643
#endif
1644
}
1645

    
1646
/* enable or disable single step mode. EXCP_DEBUG is returned by the
1647
   CPU loop after each instruction */
1648
void cpu_single_step(CPUArchState *env, int enabled)
1649
{
1650
#if defined(TARGET_HAS_ICE)
1651
    if (env->singlestep_enabled != enabled) {
1652
        env->singlestep_enabled = enabled;
1653
        if (kvm_enabled())
1654
            kvm_update_guest_debug(env, 0);
1655
        else {
1656
            /* must flush all the translated code to avoid inconsistencies */
1657
            /* XXX: only flush what is necessary */
1658
            tb_flush(env);
1659
        }
1660
    }
1661
#endif
1662
}
1663

    
1664
static void cpu_unlink_tb(CPUArchState *env)
1665
{
1666
    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
1667
       problem and hope the cpu will stop of its own accord.  For userspace
1668
       emulation this often isn't actually as bad as it sounds.  Often
1669
       signals are used primarily to interrupt blocking syscalls.  */
1670
    TranslationBlock *tb;
1671
    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1672

    
1673
    spin_lock(&interrupt_lock);
1674
    tb = env->current_tb;
1675
    /* if the cpu is currently executing code, we must unlink it and
1676
       all the potentially executing TB */
1677
    if (tb) {
1678
        env->current_tb = NULL;
1679
        tb_reset_jump_recursive(tb);
1680
    }
1681
    spin_unlock(&interrupt_lock);
1682
}
1683

    
1684
#ifndef CONFIG_USER_ONLY
1685
/* mask must never be zero, except for A20 change call */
1686
static void tcg_handle_interrupt(CPUArchState *env, int mask)
1687
{
1688
    int old_mask;
1689

    
1690
    old_mask = env->interrupt_request;
1691
    env->interrupt_request |= mask;
1692

    
1693
    /*
1694
     * If called from iothread context, wake the target cpu in
1695
     * case its halted.
1696
     */
1697
    if (!qemu_cpu_is_self(env)) {
1698
        qemu_cpu_kick(env);
1699
        return;
1700
    }
1701

    
1702
    if (use_icount) {
1703
        env->icount_decr.u16.high = 0xffff;
1704
        if (!can_do_io(env)
1705
            && (mask & ~old_mask) != 0) {
1706
            cpu_abort(env, "Raised interrupt while not in I/O function");
1707
        }
1708
    } else {
1709
        cpu_unlink_tb(env);
1710
    }
1711
}
1712

    
1713
CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1714

    
1715
#else /* CONFIG_USER_ONLY */
1716

    
1717
void cpu_interrupt(CPUArchState *env, int mask)
1718
{
1719
    env->interrupt_request |= mask;
1720
    cpu_unlink_tb(env);
1721
}
1722
#endif /* CONFIG_USER_ONLY */
1723

    
1724
void cpu_reset_interrupt(CPUArchState *env, int mask)
1725
{
1726
    env->interrupt_request &= ~mask;
1727
}
1728

    
1729
void cpu_exit(CPUArchState *env)
1730
{
1731
    env->exit_request = 1;
1732
    cpu_unlink_tb(env);
1733
}
1734

    
1735
void cpu_abort(CPUArchState *env, const char *fmt, ...)
1736
{
1737
    va_list ap;
1738
    va_list ap2;
1739

    
1740
    va_start(ap, fmt);
1741
    va_copy(ap2, ap);
1742
    fprintf(stderr, "qemu: fatal: ");
1743
    vfprintf(stderr, fmt, ap);
1744
    fprintf(stderr, "\n");
1745
    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1746
    if (qemu_log_enabled()) {
1747
        qemu_log("qemu: fatal: ");
1748
        qemu_log_vprintf(fmt, ap2);
1749
        qemu_log("\n");
1750
        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1751
        qemu_log_flush();
1752
        qemu_log_close();
1753
    }
1754
    va_end(ap2);
1755
    va_end(ap);
1756
#if defined(CONFIG_USER_ONLY)
1757
    {
1758
        struct sigaction act;
1759
        sigfillset(&act.sa_mask);
1760
        act.sa_handler = SIG_DFL;
1761
        sigaction(SIGABRT, &act, NULL);
1762
    }
1763
#endif
1764
    abort();
1765
}
1766

    
1767
CPUArchState *cpu_copy(CPUArchState *env)
1768
{
1769
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
1770
    CPUArchState *next_cpu = new_env->next_cpu;
1771
    int cpu_index = new_env->cpu_index;
1772
#if defined(TARGET_HAS_ICE)
1773
    CPUBreakpoint *bp;
1774
    CPUWatchpoint *wp;
1775
#endif
1776

    
1777
    memcpy(new_env, env, sizeof(CPUArchState));
1778

    
1779
    /* Preserve chaining and index. */
1780
    new_env->next_cpu = next_cpu;
1781
    new_env->cpu_index = cpu_index;
1782

    
1783
    /* Clone all break/watchpoints.
1784
       Note: Once we support ptrace with hw-debug register access, make sure
1785
       BP_CPU break/watchpoints are handled correctly on clone. */
1786
    QTAILQ_INIT(&env->breakpoints);
1787
    QTAILQ_INIT(&env->watchpoints);
1788
#if defined(TARGET_HAS_ICE)
1789
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1790
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1791
    }
1792
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1793
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1794
                              wp->flags, NULL);
1795
    }
1796
#endif
1797

    
1798
    return new_env;
1799
}
1800

    
1801
#if !defined(CONFIG_USER_ONLY)
1802
void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1803
{
1804
    unsigned int i;
1805

    
1806
    /* Discard jump cache entries for any tb which might potentially
1807
       overlap the flushed page.  */
1808
    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1809
    memset (&env->tb_jmp_cache[i], 0, 
1810
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1811

    
1812
    i = tb_jmp_cache_hash_page(addr);
1813
    memset (&env->tb_jmp_cache[i], 0, 
1814
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1815
}
1816

    
1817
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1818
                                      uintptr_t length)
1819
{
1820
    uintptr_t start1;
1821

    
1822
    /* we modify the TLB cache so that the dirty bit will be set again
1823
       when accessing the range */
1824
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1825
    /* Check that we don't span multiple blocks - this breaks the
1826
       address comparisons below.  */
1827
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1828
            != (end - 1) - start) {
1829
        abort();
1830
    }
1831
    cpu_tlb_reset_dirty_all(start1, length);
1832

    
1833
}
1834

    
1835
/* Note: start and end must be within the same ram block.  */
1836
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1837
                                     int dirty_flags)
1838
{
1839
    uintptr_t length;
1840

    
1841
    start &= TARGET_PAGE_MASK;
1842
    end = TARGET_PAGE_ALIGN(end);
1843

    
1844
    length = end - start;
1845
    if (length == 0)
1846
        return;
1847
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1848

    
1849
    if (tcg_enabled()) {
1850
        tlb_reset_dirty_range_all(start, end, length);
1851
    }
1852
}
1853

    
1854
int cpu_physical_memory_set_dirty_tracking(int enable)
1855
{
1856
    int ret = 0;
1857
    in_migration = enable;
1858
    return ret;
1859
}
1860

    
1861
target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1862
                                                   MemoryRegionSection *section,
1863
                                                   target_ulong vaddr,
1864
                                                   target_phys_addr_t paddr,
1865
                                                   int prot,
1866
                                                   target_ulong *address)
1867
{
1868
    target_phys_addr_t iotlb;
1869
    CPUWatchpoint *wp;
1870

    
1871
    if (memory_region_is_ram(section->mr)) {
1872
        /* Normal RAM.  */
1873
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1874
            + memory_region_section_addr(section, paddr);
1875
        if (!section->readonly) {
1876
            iotlb |= phys_section_notdirty;
1877
        } else {
1878
            iotlb |= phys_section_rom;
1879
        }
1880
    } else {
1881
        /* IO handlers are currently passed a physical address.
1882
           It would be nice to pass an offset from the base address
1883
           of that region.  This would avoid having to special case RAM,
1884
           and avoid full address decoding in every device.
1885
           We can't use the high bits of pd for this because
1886
           IO_MEM_ROMD uses these as a ram address.  */
1887
        iotlb = section - phys_sections;
1888
        iotlb += memory_region_section_addr(section, paddr);
1889
    }
1890

    
1891
    /* Make accesses to pages with watchpoints go via the
1892
       watchpoint trap routines.  */
1893
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1894
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1895
            /* Avoid trapping reads of pages with a write breakpoint. */
1896
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1897
                iotlb = phys_section_watch + paddr;
1898
                *address |= TLB_MMIO;
1899
                break;
1900
            }
1901
        }
1902
    }
1903

    
1904
    return iotlb;
1905
}
1906

    
1907
#else
1908
/*
1909
 * Walks guest process memory "regions" one by one
1910
 * and calls callback function 'fn' for each region.
1911
 */
1912

    
1913
struct walk_memory_regions_data
1914
{
1915
    walk_memory_regions_fn fn;
1916
    void *priv;
1917
    uintptr_t start;
1918
    int prot;
1919
};
1920

    
1921
static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1922
                                   abi_ulong end, int new_prot)
1923
{
1924
    if (data->start != -1ul) {
1925
        int rc = data->fn(data->priv, data->start, end, data->prot);
1926
        if (rc != 0) {
1927
            return rc;
1928
        }
1929
    }
1930

    
1931
    data->start = (new_prot ? end : -1ul);
1932
    data->prot = new_prot;
1933

    
1934
    return 0;
1935
}
1936

    
1937
static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1938
                                 abi_ulong base, int level, void **lp)
1939
{
1940
    abi_ulong pa;
1941
    int i, rc;
1942

    
1943
    if (*lp == NULL) {
1944
        return walk_memory_regions_end(data, base, 0);
1945
    }
1946

    
1947
    if (level == 0) {
1948
        PageDesc *pd = *lp;
1949
        for (i = 0; i < L2_SIZE; ++i) {
1950
            int prot = pd[i].flags;
1951

    
1952
            pa = base | (i << TARGET_PAGE_BITS);
1953
            if (prot != data->prot) {
1954
                rc = walk_memory_regions_end(data, pa, prot);
1955
                if (rc != 0) {
1956
                    return rc;
1957
                }
1958
            }
1959
        }
1960
    } else {
1961
        void **pp = *lp;
1962
        for (i = 0; i < L2_SIZE; ++i) {
1963
            pa = base | ((abi_ulong)i <<
1964
                (TARGET_PAGE_BITS + L2_BITS * level));
1965
            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1966
            if (rc != 0) {
1967
                return rc;
1968
            }
1969
        }
1970
    }
1971

    
1972
    return 0;
1973
}
1974

    
1975
int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1976
{
1977
    struct walk_memory_regions_data data;
1978
    uintptr_t i;
1979

    
1980
    data.fn = fn;
1981
    data.priv = priv;
1982
    data.start = -1ul;
1983
    data.prot = 0;
1984

    
1985
    for (i = 0; i < V_L1_SIZE; i++) {
1986
        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1987
                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1988
        if (rc != 0) {
1989
            return rc;
1990
        }
1991
    }
1992

    
1993
    return walk_memory_regions_end(&data, 0, 0);
1994
}
1995

    
1996
static int dump_region(void *priv, abi_ulong start,
1997
    abi_ulong end, unsigned long prot)
1998
{
1999
    FILE *f = (FILE *)priv;
2000

    
2001
    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2002
        " "TARGET_ABI_FMT_lx" %c%c%c\n",
2003
        start, end, end - start,
2004
        ((prot & PAGE_READ) ? 'r' : '-'),
2005
        ((prot & PAGE_WRITE) ? 'w' : '-'),
2006
        ((prot & PAGE_EXEC) ? 'x' : '-'));
2007

    
2008
    return (0);
2009
}
2010

    
2011
/* dump memory mappings */
2012
void page_dump(FILE *f)
2013
{
2014
    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2015
            "start", "end", "size", "prot");
2016
    walk_memory_regions(f, dump_region);
2017
}
2018

    
2019
int page_get_flags(target_ulong address)
2020
{
2021
    PageDesc *p;
2022

    
2023
    p = page_find(address >> TARGET_PAGE_BITS);
2024
    if (!p)
2025
        return 0;
2026
    return p->flags;
2027
}
2028

    
2029
/* Modify the flags of a page and invalidate the code if necessary.
2030
   The flag PAGE_WRITE_ORG is positioned automatically depending
2031
   on PAGE_WRITE.  The mmap_lock should already be held.  */
2032
void page_set_flags(target_ulong start, target_ulong end, int flags)
2033
{
2034
    target_ulong addr, len;
2035

    
2036
    /* This function should never be called with addresses outside the
2037
       guest address space.  If this assert fires, it probably indicates
2038
       a missing call to h2g_valid.  */
2039
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2040
    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2041
#endif
2042
    assert(start < end);
2043

    
2044
    start = start & TARGET_PAGE_MASK;
2045
    end = TARGET_PAGE_ALIGN(end);
2046

    
2047
    if (flags & PAGE_WRITE) {
2048
        flags |= PAGE_WRITE_ORG;
2049
    }
2050

    
2051
    for (addr = start, len = end - start;
2052
         len != 0;
2053
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2054
        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2055

    
2056
        /* If the write protection bit is set, then we invalidate
2057
           the code inside.  */
2058
        if (!(p->flags & PAGE_WRITE) &&
2059
            (flags & PAGE_WRITE) &&
2060
            p->first_tb) {
2061
            tb_invalidate_phys_page(addr, 0, NULL);
2062
        }
2063
        p->flags = flags;
2064
    }
2065
}
2066

    
2067
int page_check_range(target_ulong start, target_ulong len, int flags)
2068
{
2069
    PageDesc *p;
2070
    target_ulong end;
2071
    target_ulong addr;
2072

    
2073
    /* This function should never be called with addresses outside the
2074
       guest address space.  If this assert fires, it probably indicates
2075
       a missing call to h2g_valid.  */
2076
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2077
    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2078
#endif
2079

    
2080
    if (len == 0) {
2081
        return 0;
2082
    }
2083
    if (start + len - 1 < start) {
2084
        /* We've wrapped around.  */
2085
        return -1;
2086
    }
2087

    
2088
    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2089
    start = start & TARGET_PAGE_MASK;
2090

    
2091
    for (addr = start, len = end - start;
2092
         len != 0;
2093
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2094
        p = page_find(addr >> TARGET_PAGE_BITS);
2095
        if( !p )
2096
            return -1;
2097
        if( !(p->flags & PAGE_VALID) )
2098
            return -1;
2099

    
2100
        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2101
            return -1;
2102
        if (flags & PAGE_WRITE) {
2103
            if (!(p->flags & PAGE_WRITE_ORG))
2104
                return -1;
2105
            /* unprotect the page if it was put read-only because it
2106
               contains translated code */
2107
            if (!(p->flags & PAGE_WRITE)) {
2108
                if (!page_unprotect(addr, 0, NULL))
2109
                    return -1;
2110
            }
2111
            return 0;
2112
        }
2113
    }
2114
    return 0;
2115
}
2116

    
2117
/* called from signal handler: invalidate the code and unprotect the
2118
   page. Return TRUE if the fault was successfully handled. */
2119
int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2120
{
2121
    unsigned int prot;
2122
    PageDesc *p;
2123
    target_ulong host_start, host_end, addr;
2124

    
2125
    /* Technically this isn't safe inside a signal handler.  However we
2126
       know this only ever happens in a synchronous SEGV handler, so in
2127
       practice it seems to be ok.  */
2128
    mmap_lock();
2129

    
2130
    p = page_find(address >> TARGET_PAGE_BITS);
2131
    if (!p) {
2132
        mmap_unlock();
2133
        return 0;
2134
    }
2135

    
2136
    /* if the page was really writable, then we change its
2137
       protection back to writable */
2138
    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2139
        host_start = address & qemu_host_page_mask;
2140
        host_end = host_start + qemu_host_page_size;
2141

    
2142
        prot = 0;
2143
        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2144
            p = page_find(addr >> TARGET_PAGE_BITS);
2145
            p->flags |= PAGE_WRITE;
2146
            prot |= p->flags;
2147

    
2148
            /* and since the content will be modified, we must invalidate
2149
               the corresponding translated code. */
2150
            tb_invalidate_phys_page(addr, pc, puc);
2151
#ifdef DEBUG_TB_CHECK
2152
            tb_invalidate_check(addr);
2153
#endif
2154
        }
2155
        mprotect((void *)g2h(host_start), qemu_host_page_size,
2156
                 prot & PAGE_BITS);
2157

    
2158
        mmap_unlock();
2159
        return 1;
2160
    }
2161
    mmap_unlock();
2162
    return 0;
2163
}
2164
#endif /* defined(CONFIG_USER_ONLY) */
2165

    
2166
#if !defined(CONFIG_USER_ONLY)
2167

    
2168
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2169
typedef struct subpage_t {
2170
    MemoryRegion iomem;
2171
    target_phys_addr_t base;
2172
    uint16_t sub_section[TARGET_PAGE_SIZE];
2173
} subpage_t;
2174

    
2175
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2176
                             uint16_t section);
2177
static subpage_t *subpage_init(target_phys_addr_t base);
2178
static void destroy_page_desc(uint16_t section_index)
2179
{
2180
    MemoryRegionSection *section = &phys_sections[section_index];
2181
    MemoryRegion *mr = section->mr;
2182

    
2183
    if (mr->subpage) {
2184
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
2185
        memory_region_destroy(&subpage->iomem);
2186
        g_free(subpage);
2187
    }
2188
}
2189

    
2190
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2191
{
2192
    unsigned i;
2193
    PhysPageEntry *p;
2194

    
2195
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
2196
        return;
2197
    }
2198

    
2199
    p = phys_map_nodes[lp->ptr];
2200
    for (i = 0; i < L2_SIZE; ++i) {
2201
        if (!p[i].is_leaf) {
2202
            destroy_l2_mapping(&p[i], level - 1);
2203
        } else {
2204
            destroy_page_desc(p[i].ptr);
2205
        }
2206
    }
2207
    lp->is_leaf = 0;
2208
    lp->ptr = PHYS_MAP_NODE_NIL;
2209
}
2210

    
2211
static void destroy_all_mappings(void)
2212
{
2213
    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2214
    phys_map_nodes_reset();
2215
}
2216

    
2217
static uint16_t phys_section_add(MemoryRegionSection *section)
2218
{
2219
    if (phys_sections_nb == phys_sections_nb_alloc) {
2220
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2221
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
2222
                                phys_sections_nb_alloc);
2223
    }
2224
    phys_sections[phys_sections_nb] = *section;
2225
    return phys_sections_nb++;
2226
}
2227

    
2228
static void phys_sections_clear(void)
2229
{
2230
    phys_sections_nb = 0;
2231
}
2232

    
2233
static void register_subpage(MemoryRegionSection *section)
2234
{
2235
    subpage_t *subpage;
2236
    target_phys_addr_t base = section->offset_within_address_space
2237
        & TARGET_PAGE_MASK;
2238
    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2239
    MemoryRegionSection subsection = {
2240
        .offset_within_address_space = base,
2241
        .size = TARGET_PAGE_SIZE,
2242
    };
2243
    target_phys_addr_t start, end;
2244

    
2245
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2246

    
2247
    if (!(existing->mr->subpage)) {
2248
        subpage = subpage_init(base);
2249
        subsection.mr = &subpage->iomem;
2250
        phys_page_set(base >> TARGET_PAGE_BITS, 1,
2251
                      phys_section_add(&subsection));
2252
    } else {
2253
        subpage = container_of(existing->mr, subpage_t, iomem);
2254
    }
2255
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2256
    end = start + section->size - 1;
2257
    subpage_register(subpage, start, end, phys_section_add(section));
2258
}
2259

    
2260

    
2261
static void register_multipage(MemoryRegionSection *section)
2262
{
2263
    target_phys_addr_t start_addr = section->offset_within_address_space;
2264
    ram_addr_t size = section->size;
2265
    target_phys_addr_t addr;
2266
    uint16_t section_index = phys_section_add(section);
2267

    
2268
    assert(size);
2269

    
2270
    addr = start_addr;
2271
    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2272
                  section_index);
2273
}
2274

    
2275
void cpu_register_physical_memory_log(MemoryRegionSection *section,
2276
                                      bool readonly)
2277
{
2278
    MemoryRegionSection now = *section, remain = *section;
2279

    
2280
    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2281
        || (now.size < TARGET_PAGE_SIZE)) {
2282
        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2283
                       - now.offset_within_address_space,
2284
                       now.size);
2285
        register_subpage(&now);
2286
        remain.size -= now.size;
2287
        remain.offset_within_address_space += now.size;
2288
        remain.offset_within_region += now.size;
2289
    }
2290
    while (remain.size >= TARGET_PAGE_SIZE) {
2291
        now = remain;
2292
        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2293
            now.size = TARGET_PAGE_SIZE;
2294
            register_subpage(&now);
2295
        } else {
2296
            now.size &= TARGET_PAGE_MASK;
2297
            register_multipage(&now);
2298
        }
2299
        remain.size -= now.size;
2300
        remain.offset_within_address_space += now.size;
2301
        remain.offset_within_region += now.size;
2302
    }
2303
    now = remain;
2304
    if (now.size) {
2305
        register_subpage(&now);
2306
    }
2307
}
2308

    
2309

    
2310
void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2311
{
2312
    if (kvm_enabled())
2313
        kvm_coalesce_mmio_region(addr, size);
2314
}
2315

    
2316
void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2317
{
2318
    if (kvm_enabled())
2319
        kvm_uncoalesce_mmio_region(addr, size);
2320
}
2321

    
2322
void qemu_flush_coalesced_mmio_buffer(void)
2323
{
2324
    if (kvm_enabled())
2325
        kvm_flush_coalesced_mmio_buffer();
2326
}
2327

    
2328
#if defined(__linux__) && !defined(TARGET_S390X)
2329

    
2330
#include <sys/vfs.h>
2331

    
2332
#define HUGETLBFS_MAGIC       0x958458f6
2333

    
2334
static long gethugepagesize(const char *path)
2335
{
2336
    struct statfs fs;
2337
    int ret;
2338

    
2339
    do {
2340
        ret = statfs(path, &fs);
2341
    } while (ret != 0 && errno == EINTR);
2342

    
2343
    if (ret != 0) {
2344
        perror(path);
2345
        return 0;
2346
    }
2347

    
2348
    if (fs.f_type != HUGETLBFS_MAGIC)
2349
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2350

    
2351
    return fs.f_bsize;
2352
}
2353

    
2354
static void *file_ram_alloc(RAMBlock *block,
2355
                            ram_addr_t memory,
2356
                            const char *path)
2357
{
2358
    char *filename;
2359
    void *area;
2360
    int fd;
2361
#ifdef MAP_POPULATE
2362
    int flags;
2363
#endif
2364
    unsigned long hpagesize;
2365

    
2366
    hpagesize = gethugepagesize(path);
2367
    if (!hpagesize) {
2368
        return NULL;
2369
    }
2370

    
2371
    if (memory < hpagesize) {
2372
        return NULL;
2373
    }
2374

    
2375
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
2376
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2377
        return NULL;
2378
    }
2379

    
2380
    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2381
        return NULL;
2382
    }
2383

    
2384
    fd = mkstemp(filename);
2385
    if (fd < 0) {
2386
        perror("unable to create backing store for hugepages");
2387
        free(filename);
2388
        return NULL;
2389
    }
2390
    unlink(filename);
2391
    free(filename);
2392

    
2393
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
2394

    
2395
    /*
2396
     * ftruncate is not supported by hugetlbfs in older
2397
     * hosts, so don't bother bailing out on errors.
2398
     * If anything goes wrong with it under other filesystems,
2399
     * mmap will fail.
2400
     */
2401
    if (ftruncate(fd, memory))
2402
        perror("ftruncate");
2403

    
2404
#ifdef MAP_POPULATE
2405
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2406
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
2407
     * to sidestep this quirk.
2408
     */
2409
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2410
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2411
#else
2412
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2413
#endif
2414
    if (area == MAP_FAILED) {
2415
        perror("file_ram_alloc: can't mmap RAM pages");
2416
        close(fd);
2417
        return (NULL);
2418
    }
2419
    block->fd = fd;
2420
    return area;
2421
}
2422
#endif
2423

    
2424
static ram_addr_t find_ram_offset(ram_addr_t size)
2425
{
2426
    RAMBlock *block, *next_block;
2427
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2428

    
2429
    if (QLIST_EMPTY(&ram_list.blocks))
2430
        return 0;
2431

    
2432
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2433
        ram_addr_t end, next = RAM_ADDR_MAX;
2434

    
2435
        end = block->offset + block->length;
2436

    
2437
        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2438
            if (next_block->offset >= end) {
2439
                next = MIN(next, next_block->offset);
2440
            }
2441
        }
2442
        if (next - end >= size && next - end < mingap) {
2443
            offset = end;
2444
            mingap = next - end;
2445
        }
2446
    }
2447

    
2448
    if (offset == RAM_ADDR_MAX) {
2449
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2450
                (uint64_t)size);
2451
        abort();
2452
    }
2453

    
2454
    return offset;
2455
}
2456

    
2457
static ram_addr_t last_ram_offset(void)
2458
{
2459
    RAMBlock *block;
2460
    ram_addr_t last = 0;
2461

    
2462
    QLIST_FOREACH(block, &ram_list.blocks, next)
2463
        last = MAX(last, block->offset + block->length);
2464

    
2465
    return last;
2466
}
2467

    
2468
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2469
{
2470
    int ret;
2471
    QemuOpts *machine_opts;
2472

    
2473
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2474
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2475
    if (machine_opts &&
2476
        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2477
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2478
        if (ret) {
2479
            perror("qemu_madvise");
2480
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2481
                            "but dump_guest_core=off specified\n");
2482
        }
2483
    }
2484
}
2485

    
2486
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2487
{
2488
    RAMBlock *new_block, *block;
2489

    
2490
    new_block = NULL;
2491
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2492
        if (block->offset == addr) {
2493
            new_block = block;
2494
            break;
2495
        }
2496
    }
2497
    assert(new_block);
2498
    assert(!new_block->idstr[0]);
2499

    
2500
    if (dev) {
2501
        char *id = qdev_get_dev_path(dev);
2502
        if (id) {
2503
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2504
            g_free(id);
2505
        }
2506
    }
2507
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2508

    
2509
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2510
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2511
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2512
                    new_block->idstr);
2513
            abort();
2514
        }
2515
    }
2516
}
2517

    
2518
static int memory_try_enable_merging(void *addr, size_t len)
2519
{
2520
    QemuOpts *opts;
2521

    
2522
    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2523
    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2524
        /* disabled by the user */
2525
        return 0;
2526
    }
2527

    
2528
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2529
}
2530

    
2531
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2532
                                   MemoryRegion *mr)
2533
{
2534
    RAMBlock *new_block;
2535

    
2536
    size = TARGET_PAGE_ALIGN(size);
2537
    new_block = g_malloc0(sizeof(*new_block));
2538

    
2539
    new_block->mr = mr;
2540
    new_block->offset = find_ram_offset(size);
2541
    if (host) {
2542
        new_block->host = host;
2543
        new_block->flags |= RAM_PREALLOC_MASK;
2544
    } else {
2545
        if (mem_path) {
2546
#if defined (__linux__) && !defined(TARGET_S390X)
2547
            new_block->host = file_ram_alloc(new_block, size, mem_path);
2548
            if (!new_block->host) {
2549
                new_block->host = qemu_vmalloc(size);
2550
                memory_try_enable_merging(new_block->host, size);
2551
            }
2552
#else
2553
            fprintf(stderr, "-mem-path option unsupported\n");
2554
            exit(1);
2555
#endif
2556
        } else {
2557
            if (xen_enabled()) {
2558
                xen_ram_alloc(new_block->offset, size, mr);
2559
            } else if (kvm_enabled()) {
2560
                /* some s390/kvm configurations have special constraints */
2561
                new_block->host = kvm_vmalloc(size);
2562
            } else {
2563
                new_block->host = qemu_vmalloc(size);
2564
            }
2565
            memory_try_enable_merging(new_block->host, size);
2566
        }
2567
    }
2568
    new_block->length = size;
2569

    
2570
    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2571

    
2572
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2573
                                       last_ram_offset() >> TARGET_PAGE_BITS);
2574
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2575
           0, size >> TARGET_PAGE_BITS);
2576
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2577

    
2578
    qemu_ram_setup_dump(new_block->host, size);
2579

    
2580
    if (kvm_enabled())
2581
        kvm_setup_guest_memory(new_block->host, size);
2582

    
2583
    return new_block->offset;
2584
}
2585

    
2586
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2587
{
2588
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
2589
}
2590

    
2591
void qemu_ram_free_from_ptr(ram_addr_t addr)
2592
{
2593
    RAMBlock *block;
2594

    
2595
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2596
        if (addr == block->offset) {
2597
            QLIST_REMOVE(block, next);
2598
            g_free(block);
2599
            return;
2600
        }
2601
    }
2602
}
2603

    
2604
void qemu_ram_free(ram_addr_t addr)
2605
{
2606
    RAMBlock *block;
2607

    
2608
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2609
        if (addr == block->offset) {
2610
            QLIST_REMOVE(block, next);
2611
            if (block->flags & RAM_PREALLOC_MASK) {
2612
                ;
2613
            } else if (mem_path) {
2614
#if defined (__linux__) && !defined(TARGET_S390X)
2615
                if (block->fd) {
2616
                    munmap(block->host, block->length);
2617
                    close(block->fd);
2618
                } else {
2619
                    qemu_vfree(block->host);
2620
                }
2621
#else
2622
                abort();
2623
#endif
2624
            } else {
2625
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2626
                munmap(block->host, block->length);
2627
#else
2628
                if (xen_enabled()) {
2629
                    xen_invalidate_map_cache_entry(block->host);
2630
                } else {
2631
                    qemu_vfree(block->host);
2632
                }
2633
#endif
2634
            }
2635
            g_free(block);
2636
            return;
2637
        }
2638
    }
2639

    
2640
}
2641

    
2642
#ifndef _WIN32
2643
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2644
{
2645
    RAMBlock *block;
2646
    ram_addr_t offset;
2647
    int flags;
2648
    void *area, *vaddr;
2649

    
2650
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2651
        offset = addr - block->offset;
2652
        if (offset < block->length) {
2653
            vaddr = block->host + offset;
2654
            if (block->flags & RAM_PREALLOC_MASK) {
2655
                ;
2656
            } else {
2657
                flags = MAP_FIXED;
2658
                munmap(vaddr, length);
2659
                if (mem_path) {
2660
#if defined(__linux__) && !defined(TARGET_S390X)
2661
                    if (block->fd) {
2662
#ifdef MAP_POPULATE
2663
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2664
                            MAP_PRIVATE;
2665
#else
2666
                        flags |= MAP_PRIVATE;
2667
#endif
2668
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2669
                                    flags, block->fd, offset);
2670
                    } else {
2671
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2672
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2673
                                    flags, -1, 0);
2674
                    }
2675
#else
2676
                    abort();
2677
#endif
2678
                } else {
2679
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2680
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
2681
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2682
                                flags, -1, 0);
2683
#else
2684
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2685
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2686
                                flags, -1, 0);
2687
#endif
2688
                }
2689
                if (area != vaddr) {
2690
                    fprintf(stderr, "Could not remap addr: "
2691
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2692
                            length, addr);
2693
                    exit(1);
2694
                }
2695
                memory_try_enable_merging(vaddr, length);
2696
                qemu_ram_setup_dump(vaddr, length);
2697
            }
2698
            return;
2699
        }
2700
    }
2701
}
2702
#endif /* !_WIN32 */
2703

    
2704
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2705
   With the exception of the softmmu code in this file, this should
2706
   only be used for local memory (e.g. video ram) that the device owns,
2707
   and knows it isn't going to access beyond the end of the block.
2708

2709
   It should not be used for general purpose DMA.
2710
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2711
 */
2712
void *qemu_get_ram_ptr(ram_addr_t addr)
2713
{
2714
    RAMBlock *block;
2715

    
2716
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2717
        if (addr - block->offset < block->length) {
2718
            /* Move this entry to to start of the list.  */
2719
            if (block != QLIST_FIRST(&ram_list.blocks)) {
2720
                QLIST_REMOVE(block, next);
2721
                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2722
            }
2723
            if (xen_enabled()) {
2724
                /* We need to check if the requested address is in the RAM
2725
                 * because we don't want to map the entire memory in QEMU.
2726
                 * In that case just map until the end of the page.
2727
                 */
2728
                if (block->offset == 0) {
2729
                    return xen_map_cache(addr, 0, 0);
2730
                } else if (block->host == NULL) {
2731
                    block->host =
2732
                        xen_map_cache(block->offset, block->length, 1);
2733
                }
2734
            }
2735
            return block->host + (addr - block->offset);
2736
        }
2737
    }
2738

    
2739
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2740
    abort();
2741

    
2742
    return NULL;
2743
}
2744

    
2745
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2746
 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2747
 */
2748
void *qemu_safe_ram_ptr(ram_addr_t addr)
2749
{
2750
    RAMBlock *block;
2751

    
2752
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2753
        if (addr - block->offset < block->length) {
2754
            if (xen_enabled()) {
2755
                /* We need to check if the requested address is in the RAM
2756
                 * because we don't want to map the entire memory in QEMU.
2757
                 * In that case just map until the end of the page.
2758
                 */
2759
                if (block->offset == 0) {
2760
                    return xen_map_cache(addr, 0, 0);
2761
                } else if (block->host == NULL) {
2762
                    block->host =
2763
                        xen_map_cache(block->offset, block->length, 1);
2764
                }
2765
            }
2766
            return block->host + (addr - block->offset);
2767
        }
2768
    }
2769

    
2770
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2771
    abort();
2772

    
2773
    return NULL;
2774
}
2775

    
2776
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2777
 * but takes a size argument */
2778
void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2779
{
2780
    if (*size == 0) {
2781
        return NULL;
2782
    }
2783
    if (xen_enabled()) {
2784
        return xen_map_cache(addr, *size, 1);
2785
    } else {
2786
        RAMBlock *block;
2787

    
2788
        QLIST_FOREACH(block, &ram_list.blocks, next) {
2789
            if (addr - block->offset < block->length) {
2790
                if (addr - block->offset + *size > block->length)
2791
                    *size = block->length - addr + block->offset;
2792
                return block->host + (addr - block->offset);
2793
            }
2794
        }
2795

    
2796
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2797
        abort();
2798
    }
2799
}
2800

    
2801
void qemu_put_ram_ptr(void *addr)
2802
{
2803
    trace_qemu_put_ram_ptr(addr);
2804
}
2805

    
2806
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2807
{
2808
    RAMBlock *block;
2809
    uint8_t *host = ptr;
2810

    
2811
    if (xen_enabled()) {
2812
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2813
        return 0;
2814
    }
2815

    
2816
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2817
        /* This case append when the block is not mapped. */
2818
        if (block->host == NULL) {
2819
            continue;
2820
        }
2821
        if (host - block->host < block->length) {
2822
            *ram_addr = block->offset + (host - block->host);
2823
            return 0;
2824
        }
2825
    }
2826

    
2827
    return -1;
2828
}
2829

    
2830
/* Some of the softmmu routines need to translate from a host pointer
2831
   (typically a TLB entry) back to a ram offset.  */
2832
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2833
{
2834
    ram_addr_t ram_addr;
2835

    
2836
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2837
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
2838
        abort();
2839
    }
2840
    return ram_addr;
2841
}
2842

    
2843
static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2844
                                    unsigned size)
2845
{
2846
#ifdef DEBUG_UNASSIGNED
2847
    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2848
#endif
2849
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2850
    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2851
#endif
2852
    return 0;
2853
}
2854

    
2855
static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2856
                                 uint64_t val, unsigned size)
2857
{
2858
#ifdef DEBUG_UNASSIGNED
2859
    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2860
#endif
2861
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2862
    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2863
#endif
2864
}
2865

    
2866
static const MemoryRegionOps unassigned_mem_ops = {
2867
    .read = unassigned_mem_read,
2868
    .write = unassigned_mem_write,
2869
    .endianness = DEVICE_NATIVE_ENDIAN,
2870
};
2871

    
2872
static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2873
                               unsigned size)
2874
{
2875
    abort();
2876
}
2877

    
2878
static void error_mem_write(void *opaque, target_phys_addr_t addr,
2879
                            uint64_t value, unsigned size)
2880
{
2881
    abort();
2882
}
2883

    
2884
static const MemoryRegionOps error_mem_ops = {
2885
    .read = error_mem_read,
2886
    .write = error_mem_write,
2887
    .endianness = DEVICE_NATIVE_ENDIAN,
2888
};
2889

    
2890
static const MemoryRegionOps rom_mem_ops = {
2891
    .read = error_mem_read,
2892
    .write = unassigned_mem_write,
2893
    .endianness = DEVICE_NATIVE_ENDIAN,
2894
};
2895

    
2896
static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2897
                               uint64_t val, unsigned size)
2898
{
2899
    int dirty_flags;
2900
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2901
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2902
#if !defined(CONFIG_USER_ONLY)
2903
        tb_invalidate_phys_page_fast(ram_addr, size);
2904
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2905
#endif
2906
    }
2907
    switch (size) {
2908
    case 1:
2909
        stb_p(qemu_get_ram_ptr(ram_addr), val);
2910
        break;
2911
    case 2:
2912
        stw_p(qemu_get_ram_ptr(ram_addr), val);
2913
        break;
2914
    case 4:
2915
        stl_p(qemu_get_ram_ptr(ram_addr), val);
2916
        break;
2917
    default:
2918
        abort();
2919
    }
2920
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2921
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2922
    /* we remove the notdirty callback only if the code has been
2923
       flushed */
2924
    if (dirty_flags == 0xff)
2925
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2926
}
2927

    
2928
static const MemoryRegionOps notdirty_mem_ops = {
2929
    .read = error_mem_read,
2930
    .write = notdirty_mem_write,
2931
    .endianness = DEVICE_NATIVE_ENDIAN,
2932
};
2933

    
2934
/* Generate a debug exception if a watchpoint has been hit.  */
2935
static void check_watchpoint(int offset, int len_mask, int flags)
2936
{
2937
    CPUArchState *env = cpu_single_env;
2938
    target_ulong pc, cs_base;
2939
    TranslationBlock *tb;
2940
    target_ulong vaddr;
2941
    CPUWatchpoint *wp;
2942
    int cpu_flags;
2943

    
2944
    if (env->watchpoint_hit) {
2945
        /* We re-entered the check after replacing the TB. Now raise
2946
         * the debug interrupt so that is will trigger after the
2947
         * current instruction. */
2948
        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2949
        return;
2950
    }
2951
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2952
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2953
        if ((vaddr == (wp->vaddr & len_mask) ||
2954
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2955
            wp->flags |= BP_WATCHPOINT_HIT;
2956
            if (!env->watchpoint_hit) {
2957
                env->watchpoint_hit = wp;
2958
                tb = tb_find_pc(env->mem_io_pc);
2959
                if (!tb) {
2960
                    cpu_abort(env, "check_watchpoint: could not find TB for "
2961
                              "pc=%p", (void *)env->mem_io_pc);
2962
                }
2963
                cpu_restore_state(tb, env, env->mem_io_pc);
2964
                tb_phys_invalidate(tb, -1);
2965
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2966
                    env->exception_index = EXCP_DEBUG;
2967
                    cpu_loop_exit(env);
2968
                } else {
2969
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2970
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2971
                    cpu_resume_from_signal(env, NULL);
2972
                }
2973
            }
2974
        } else {
2975
            wp->flags &= ~BP_WATCHPOINT_HIT;
2976
        }
2977
    }
2978
}
2979

    
2980
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2981
   so these check for a hit then pass through to the normal out-of-line
2982
   phys routines.  */
2983
static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2984
                               unsigned size)
2985
{
2986
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2987
    switch (size) {
2988
    case 1: return ldub_phys(addr);
2989
    case 2: return lduw_phys(addr);
2990
    case 4: return ldl_phys(addr);
2991
    default: abort();
2992
    }
2993
}
2994

    
2995
static void watch_mem_write(void *opaque, target_phys_addr_t addr,
2996
                            uint64_t val, unsigned size)
2997
{
2998
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2999
    switch (size) {
3000
    case 1:
3001
        stb_phys(addr, val);
3002
        break;
3003
    case 2:
3004
        stw_phys(addr, val);
3005
        break;
3006
    case 4:
3007
        stl_phys(addr, val);
3008
        break;
3009
    default: abort();
3010
    }
3011
}
3012

    
3013
static const MemoryRegionOps watch_mem_ops = {
3014
    .read = watch_mem_read,
3015
    .write = watch_mem_write,
3016
    .endianness = DEVICE_NATIVE_ENDIAN,
3017
};
3018

    
3019
static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3020
                             unsigned len)
3021
{
3022
    subpage_t *mmio = opaque;
3023
    unsigned int idx = SUBPAGE_IDX(addr);
3024
    MemoryRegionSection *section;
3025
#if defined(DEBUG_SUBPAGE)
3026
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3027
           mmio, len, addr, idx);
3028
#endif
3029

    
3030
    section = &phys_sections[mmio->sub_section[idx]];
3031
    addr += mmio->base;
3032
    addr -= section->offset_within_address_space;
3033
    addr += section->offset_within_region;
3034
    return io_mem_read(section->mr, addr, len);
3035
}
3036

    
3037
static void subpage_write(void *opaque, target_phys_addr_t addr,
3038
                          uint64_t value, unsigned len)
3039
{
3040
    subpage_t *mmio = opaque;
3041
    unsigned int idx = SUBPAGE_IDX(addr);
3042
    MemoryRegionSection *section;
3043
#if defined(DEBUG_SUBPAGE)
3044
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3045
           " idx %d value %"PRIx64"\n",
3046
           __func__, mmio, len, addr, idx, value);
3047
#endif
3048

    
3049
    section = &phys_sections[mmio->sub_section[idx]];
3050
    addr += mmio->base;
3051
    addr -= section->offset_within_address_space;
3052
    addr += section->offset_within_region;
3053
    io_mem_write(section->mr, addr, value, len);
3054
}
3055

    
3056
static const MemoryRegionOps subpage_ops = {
3057
    .read = subpage_read,
3058
    .write = subpage_write,
3059
    .endianness = DEVICE_NATIVE_ENDIAN,
3060
};
3061

    
3062
static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3063
                                 unsigned size)
3064
{
3065
    ram_addr_t raddr = addr;
3066
    void *ptr = qemu_get_ram_ptr(raddr);
3067
    switch (size) {
3068
    case 1: return ldub_p(ptr);
3069
    case 2: return lduw_p(ptr);
3070
    case 4: return ldl_p(ptr);
3071
    default: abort();
3072
    }
3073
}
3074

    
3075
static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3076
                              uint64_t value, unsigned size)
3077
{
3078
    ram_addr_t raddr = addr;
3079
    void *ptr = qemu_get_ram_ptr(raddr);
3080
    switch (size) {
3081
    case 1: return stb_p(ptr, value);
3082
    case 2: return stw_p(ptr, value);
3083
    case 4: return stl_p(ptr, value);
3084
    default: abort();
3085
    }
3086
}
3087

    
3088
static const MemoryRegionOps subpage_ram_ops = {
3089
    .read = subpage_ram_read,
3090
    .write = subpage_ram_write,
3091
    .endianness = DEVICE_NATIVE_ENDIAN,
3092
};
3093

    
3094
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3095
                             uint16_t section)
3096
{
3097
    int idx, eidx;
3098

    
3099
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3100
        return -1;
3101
    idx = SUBPAGE_IDX(start);
3102
    eidx = SUBPAGE_IDX(end);
3103
#if defined(DEBUG_SUBPAGE)
3104
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3105
           mmio, start, end, idx, eidx, memory);
3106
#endif
3107
    if (memory_region_is_ram(phys_sections[section].mr)) {
3108
        MemoryRegionSection new_section = phys_sections[section];
3109
        new_section.mr = &io_mem_subpage_ram;
3110
        section = phys_section_add(&new_section);
3111
    }
3112
    for (; idx <= eidx; idx++) {
3113
        mmio->sub_section[idx] = section;
3114
    }
3115

    
3116
    return 0;
3117
}
3118

    
3119
static subpage_t *subpage_init(target_phys_addr_t base)
3120
{
3121
    subpage_t *mmio;
3122

    
3123
    mmio = g_malloc0(sizeof(subpage_t));
3124

    
3125
    mmio->base = base;
3126
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3127
                          "subpage", TARGET_PAGE_SIZE);
3128
    mmio->iomem.subpage = true;
3129
#if defined(DEBUG_SUBPAGE)
3130
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3131
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3132
#endif
3133
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3134

    
3135
    return mmio;
3136
}
3137

    
3138
static uint16_t dummy_section(MemoryRegion *mr)
3139
{
3140
    MemoryRegionSection section = {
3141
        .mr = mr,
3142
        .offset_within_address_space = 0,
3143
        .offset_within_region = 0,
3144
        .size = UINT64_MAX,
3145
    };
3146

    
3147
    return phys_section_add(&section);
3148
}
3149

    
3150
MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3151
{
3152
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3153
}
3154

    
3155
static void io_mem_init(void)
3156
{
3157
    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3158
    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3159
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3160
                          "unassigned", UINT64_MAX);
3161
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3162
                          "notdirty", UINT64_MAX);
3163
    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3164
                          "subpage-ram", UINT64_MAX);
3165
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3166
                          "watch", UINT64_MAX);
3167
}
3168

    
3169
static void core_begin(MemoryListener *listener)
3170
{
3171
    destroy_all_mappings();
3172
    phys_sections_clear();
3173
    phys_map.ptr = PHYS_MAP_NODE_NIL;
3174
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
3175
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
3176
    phys_section_rom = dummy_section(&io_mem_rom);
3177
    phys_section_watch = dummy_section(&io_mem_watch);
3178
}
3179

    
3180
static void core_commit(MemoryListener *listener)
3181
{
3182
    CPUArchState *env;
3183

    
3184
    /* since each CPU stores ram addresses in its TLB cache, we must
3185
       reset the modified entries */
3186
    /* XXX: slow ! */
3187
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
3188
        tlb_flush(env, 1);
3189
    }
3190
}
3191

    
3192
static void core_region_add(MemoryListener *listener,
3193
                            MemoryRegionSection *section)
3194
{
3195
    cpu_register_physical_memory_log(section, section->readonly);
3196
}
3197

    
3198
static void core_region_del(MemoryListener *listener,
3199
                            MemoryRegionSection *section)
3200
{
3201
}
3202

    
3203
static void core_region_nop(MemoryListener *listener,
3204
                            MemoryRegionSection *section)
3205
{
3206
    cpu_register_physical_memory_log(section, section->readonly);
3207
}
3208

    
3209
static void core_log_start(MemoryListener *listener,
3210
                           MemoryRegionSection *section)
3211
{
3212
}
3213

    
3214
static void core_log_stop(MemoryListener *listener,
3215
                          MemoryRegionSection *section)
3216
{
3217
}
3218

    
3219
static void core_log_sync(MemoryListener *listener,
3220
                          MemoryRegionSection *section)
3221
{
3222
}
3223

    
3224
static void core_log_global_start(MemoryListener *listener)
3225
{
3226
    cpu_physical_memory_set_dirty_tracking(1);
3227
}
3228

    
3229
static void core_log_global_stop(MemoryListener *listener)
3230
{
3231
    cpu_physical_memory_set_dirty_tracking(0);
3232
}
3233

    
3234
static void core_eventfd_add(MemoryListener *listener,
3235
                             MemoryRegionSection *section,
3236
                             bool match_data, uint64_t data, EventNotifier *e)
3237
{
3238
}
3239

    
3240
static void core_eventfd_del(MemoryListener *listener,
3241
                             MemoryRegionSection *section,
3242
                             bool match_data, uint64_t data, EventNotifier *e)
3243
{
3244
}
3245

    
3246
static void io_begin(MemoryListener *listener)
3247
{
3248
}
3249

    
3250
static void io_commit(MemoryListener *listener)
3251
{
3252
}
3253

    
3254
static void io_region_add(MemoryListener *listener,
3255
                          MemoryRegionSection *section)
3256
{
3257
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3258

    
3259
    mrio->mr = section->mr;
3260
    mrio->offset = section->offset_within_region;
3261
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3262
                 section->offset_within_address_space, section->size);
3263
    ioport_register(&mrio->iorange);
3264
}
3265

    
3266
static void io_region_del(MemoryListener *listener,
3267
                          MemoryRegionSection *section)
3268
{
3269
    isa_unassign_ioport(section->offset_within_address_space, section->size);
3270
}
3271

    
3272
static void io_region_nop(MemoryListener *listener,
3273
                          MemoryRegionSection *section)
3274
{
3275
}
3276

    
3277
static void io_log_start(MemoryListener *listener,
3278
                         MemoryRegionSection *section)
3279
{
3280
}
3281

    
3282
static void io_log_stop(MemoryListener *listener,
3283
                        MemoryRegionSection *section)
3284
{
3285
}
3286

    
3287
static void io_log_sync(MemoryListener *listener,
3288
                        MemoryRegionSection *section)
3289
{
3290
}
3291

    
3292
static void io_log_global_start(MemoryListener *listener)
3293
{
3294
}
3295

    
3296
static void io_log_global_stop(MemoryListener *listener)
3297
{
3298
}
3299

    
3300
static void io_eventfd_add(MemoryListener *listener,
3301
                           MemoryRegionSection *section,
3302
                           bool match_data, uint64_t data, EventNotifier *e)
3303
{
3304
}
3305

    
3306
static void io_eventfd_del(MemoryListener *listener,
3307
                           MemoryRegionSection *section,
3308
                           bool match_data, uint64_t data, EventNotifier *e)
3309
{
3310
}
3311

    
3312
static MemoryListener core_memory_listener = {
3313
    .begin = core_begin,
3314
    .commit = core_commit,
3315
    .region_add = core_region_add,
3316
    .region_del = core_region_del,
3317
    .region_nop = core_region_nop,
3318
    .log_start = core_log_start,
3319
    .log_stop = core_log_stop,
3320
    .log_sync = core_log_sync,
3321
    .log_global_start = core_log_global_start,
3322
    .log_global_stop = core_log_global_stop,
3323
    .eventfd_add = core_eventfd_add,
3324
    .eventfd_del = core_eventfd_del,
3325
    .priority = 0,
3326
};
3327

    
3328
static MemoryListener io_memory_listener = {
3329
    .begin = io_begin,
3330
    .commit = io_commit,
3331
    .region_add = io_region_add,
3332
    .region_del = io_region_del,
3333
    .region_nop = io_region_nop,
3334
    .log_start = io_log_start,
3335
    .log_stop = io_log_stop,
3336
    .log_sync = io_log_sync,
3337
    .log_global_start = io_log_global_start,
3338
    .log_global_stop = io_log_global_stop,
3339
    .eventfd_add = io_eventfd_add,
3340
    .eventfd_del = io_eventfd_del,
3341
    .priority = 0,
3342
};
3343

    
3344
static void memory_map_init(void)
3345
{
3346
    system_memory = g_malloc(sizeof(*system_memory));
3347
    memory_region_init(system_memory, "system", INT64_MAX);
3348
    set_system_memory_map(system_memory);
3349

    
3350
    system_io = g_malloc(sizeof(*system_io));
3351
    memory_region_init(system_io, "io", 65536);
3352
    set_system_io_map(system_io);
3353

    
3354
    memory_listener_register(&core_memory_listener, system_memory);
3355
    memory_listener_register(&io_memory_listener, system_io);
3356
}
3357

    
3358
MemoryRegion *get_system_memory(void)
3359
{
3360
    return system_memory;
3361
}
3362

    
3363
MemoryRegion *get_system_io(void)
3364
{
3365
    return system_io;
3366
}
3367

    
3368
#endif /* !defined(CONFIG_USER_ONLY) */
3369

    
3370
/* physical memory access (slow version, mainly for debug) */
3371
#if defined(CONFIG_USER_ONLY)
3372
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3373
                        uint8_t *buf, int len, int is_write)
3374
{
3375
    int l, flags;
3376
    target_ulong page;
3377
    void * p;
3378

    
3379
    while (len > 0) {
3380
        page = addr & TARGET_PAGE_MASK;
3381
        l = (page + TARGET_PAGE_SIZE) - addr;
3382
        if (l > len)
3383
            l = len;
3384
        flags = page_get_flags(page);
3385
        if (!(flags & PAGE_VALID))
3386
            return -1;
3387
        if (is_write) {
3388
            if (!(flags & PAGE_WRITE))
3389
                return -1;
3390
            /* XXX: this code should not depend on lock_user */
3391
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3392
                return -1;
3393
            memcpy(p, buf, l);
3394
            unlock_user(p, addr, l);
3395
        } else {
3396
            if (!(flags & PAGE_READ))
3397
                return -1;
3398
            /* XXX: this code should not depend on lock_user */
3399
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3400
                return -1;
3401
            memcpy(buf, p, l);
3402
            unlock_user(p, addr, 0);
3403
        }
3404
        len -= l;
3405
        buf += l;
3406
        addr += l;
3407
    }
3408
    return 0;
3409
}
3410

    
3411
#else
3412

    
3413
static void invalidate_and_set_dirty(target_phys_addr_t addr,
3414
                                     target_phys_addr_t length)
3415
{
3416
    if (!cpu_physical_memory_is_dirty(addr)) {
3417
        /* invalidate code */
3418
        tb_invalidate_phys_page_range(addr, addr + length, 0);
3419
        /* set dirty bit */
3420
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3421
    }
3422
    xen_modified_memory(addr, length);
3423
}
3424

    
3425
void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3426
                            int len, int is_write)
3427
{
3428
    int l;
3429
    uint8_t *ptr;
3430
    uint32_t val;
3431
    target_phys_addr_t page;
3432
    MemoryRegionSection *section;
3433

    
3434
    while (len > 0) {
3435
        page = addr & TARGET_PAGE_MASK;
3436
        l = (page + TARGET_PAGE_SIZE) - addr;
3437
        if (l > len)
3438
            l = len;
3439
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3440

    
3441
        if (is_write) {
3442
            if (!memory_region_is_ram(section->mr)) {
3443
                target_phys_addr_t addr1;
3444
                addr1 = memory_region_section_addr(section, addr);
3445
                /* XXX: could force cpu_single_env to NULL to avoid
3446
                   potential bugs */
3447
                if (l >= 4 && ((addr1 & 3) == 0)) {
3448
                    /* 32 bit write access */
3449
                    val = ldl_p(buf);
3450
                    io_mem_write(section->mr, addr1, val, 4);
3451
                    l = 4;
3452
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3453
                    /* 16 bit write access */
3454
                    val = lduw_p(buf);
3455
                    io_mem_write(section->mr, addr1, val, 2);
3456
                    l = 2;
3457
                } else {
3458
                    /* 8 bit write access */
3459
                    val = ldub_p(buf);
3460
                    io_mem_write(section->mr, addr1, val, 1);
3461
                    l = 1;
3462
                }
3463
            } else if (!section->readonly) {
3464
                ram_addr_t addr1;
3465
                addr1 = memory_region_get_ram_addr(section->mr)
3466
                    + memory_region_section_addr(section, addr);
3467
                /* RAM case */
3468
                ptr = qemu_get_ram_ptr(addr1);
3469
                memcpy(ptr, buf, l);
3470
                invalidate_and_set_dirty(addr1, l);
3471
                qemu_put_ram_ptr(ptr);
3472
            }
3473
        } else {
3474
            if (!(memory_region_is_ram(section->mr) ||
3475
                  memory_region_is_romd(section->mr))) {
3476
                target_phys_addr_t addr1;
3477
                /* I/O case */
3478
                addr1 = memory_region_section_addr(section, addr);
3479
                if (l >= 4 && ((addr1 & 3) == 0)) {
3480
                    /* 32 bit read access */
3481
                    val = io_mem_read(section->mr, addr1, 4);
3482
                    stl_p(buf, val);
3483
                    l = 4;
3484
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3485
                    /* 16 bit read access */
3486
                    val = io_mem_read(section->mr, addr1, 2);
3487
                    stw_p(buf, val);
3488
                    l = 2;
3489
                } else {
3490
                    /* 8 bit read access */
3491
                    val = io_mem_read(section->mr, addr1, 1);
3492
                    stb_p(buf, val);
3493
                    l = 1;
3494
                }
3495
            } else {
3496
                /* RAM case */
3497
                ptr = qemu_get_ram_ptr(section->mr->ram_addr
3498
                                       + memory_region_section_addr(section,
3499
                                                                    addr));
3500
                memcpy(buf, ptr, l);
3501
                qemu_put_ram_ptr(ptr);
3502
            }
3503
        }
3504
        len -= l;
3505
        buf += l;
3506
        addr += l;
3507
    }
3508
}
3509

    
3510
/* used for ROM loading : can write in RAM and ROM */
3511
void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3512
                                   const uint8_t *buf, int len)
3513
{
3514
    int l;
3515
    uint8_t *ptr;
3516
    target_phys_addr_t page;
3517
    MemoryRegionSection *section;
3518

    
3519
    while (len > 0) {
3520
        page = addr & TARGET_PAGE_MASK;
3521
        l = (page + TARGET_PAGE_SIZE) - addr;
3522
        if (l > len)
3523
            l = len;
3524
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3525

    
3526
        if (!(memory_region_is_ram(section->mr) ||
3527
              memory_region_is_romd(section->mr))) {
3528
            /* do nothing */
3529
        } else {
3530
            unsigned long addr1;
3531
            addr1 = memory_region_get_ram_addr(section->mr)
3532
                + memory_region_section_addr(section, addr);
3533
            /* ROM/RAM case */
3534
            ptr = qemu_get_ram_ptr(addr1);
3535
            memcpy(ptr, buf, l);
3536
            invalidate_and_set_dirty(addr1, l);
3537
            qemu_put_ram_ptr(ptr);
3538
        }
3539
        len -= l;
3540
        buf += l;
3541
        addr += l;
3542
    }
3543
}
3544

    
3545
typedef struct {
3546
    void *buffer;
3547
    target_phys_addr_t addr;
3548
    target_phys_addr_t len;
3549
} BounceBuffer;
3550

    
3551
static BounceBuffer bounce;
3552

    
3553
typedef struct MapClient {
3554
    void *opaque;
3555
    void (*callback)(void *opaque);
3556
    QLIST_ENTRY(MapClient) link;
3557
} MapClient;
3558

    
3559
static QLIST_HEAD(map_client_list, MapClient) map_client_list
3560
    = QLIST_HEAD_INITIALIZER(map_client_list);
3561

    
3562
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3563
{
3564
    MapClient *client = g_malloc(sizeof(*client));
3565

    
3566
    client->opaque = opaque;
3567
    client->callback = callback;
3568
    QLIST_INSERT_HEAD(&map_client_list, client, link);
3569
    return client;
3570
}
3571

    
3572
void cpu_unregister_map_client(void *_client)
3573
{
3574
    MapClient *client = (MapClient *)_client;
3575

    
3576
    QLIST_REMOVE(client, link);
3577
    g_free(client);
3578
}
3579

    
3580
static void cpu_notify_map_clients(void)
3581
{
3582
    MapClient *client;
3583

    
3584
    while (!QLIST_EMPTY(&map_client_list)) {
3585
        client = QLIST_FIRST(&map_client_list);
3586
        client->callback(client->opaque);
3587
        cpu_unregister_map_client(client);
3588
    }
3589
}
3590

    
3591
/* Map a physical memory region into a host virtual address.
3592
 * May map a subset of the requested range, given by and returned in *plen.
3593
 * May return NULL if resources needed to perform the mapping are exhausted.
3594
 * Use only for reads OR writes - not for read-modify-write operations.
3595
 * Use cpu_register_map_client() to know when retrying the map operation is
3596
 * likely to succeed.
3597
 */
3598
void *cpu_physical_memory_map(target_phys_addr_t addr,
3599
                              target_phys_addr_t *plen,
3600
                              int is_write)
3601
{
3602
    target_phys_addr_t len = *plen;
3603
    target_phys_addr_t todo = 0;
3604
    int l;
3605
    target_phys_addr_t page;
3606
    MemoryRegionSection *section;
3607
    ram_addr_t raddr = RAM_ADDR_MAX;
3608
    ram_addr_t rlen;
3609
    void *ret;
3610

    
3611
    while (len > 0) {
3612
        page = addr & TARGET_PAGE_MASK;
3613
        l = (page + TARGET_PAGE_SIZE) - addr;
3614
        if (l > len)
3615
            l = len;
3616
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3617

    
3618
        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3619
            if (todo || bounce.buffer) {
3620
                break;
3621
            }
3622
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3623
            bounce.addr = addr;
3624
            bounce.len = l;
3625
            if (!is_write) {
3626
                cpu_physical_memory_read(addr, bounce.buffer, l);
3627
            }
3628

    
3629
            *plen = l;
3630
            return bounce.buffer;
3631
        }
3632
        if (!todo) {
3633
            raddr = memory_region_get_ram_addr(section->mr)
3634
                + memory_region_section_addr(section, addr);
3635
        }
3636

    
3637
        len -= l;
3638
        addr += l;
3639
        todo += l;
3640
    }
3641
    rlen = todo;
3642
    ret = qemu_ram_ptr_length(raddr, &rlen);
3643
    *plen = rlen;
3644
    return ret;
3645
}
3646

    
3647
/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3648
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3649
 * the amount of memory that was actually read or written by the caller.
3650
 */
3651
void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3652
                               int is_write, target_phys_addr_t access_len)
3653
{
3654
    if (buffer != bounce.buffer) {
3655
        if (is_write) {
3656
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3657
            while (access_len) {
3658
                unsigned l;
3659
                l = TARGET_PAGE_SIZE;
3660
                if (l > access_len)
3661
                    l = access_len;
3662
                invalidate_and_set_dirty(addr1, l);
3663
                addr1 += l;
3664
                access_len -= l;
3665
            }
3666
        }
3667
        if (xen_enabled()) {
3668
            xen_invalidate_map_cache_entry(buffer);
3669
        }
3670
        return;
3671
    }
3672
    if (is_write) {
3673
        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3674
    }
3675
    qemu_vfree(bounce.buffer);
3676
    bounce.buffer = NULL;
3677
    cpu_notify_map_clients();
3678
}
3679

    
3680
/* warning: addr must be aligned */
3681
static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3682
                                         enum device_endian endian)
3683
{
3684
    uint8_t *ptr;
3685
    uint32_t val;
3686
    MemoryRegionSection *section;
3687

    
3688
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3689

    
3690
    if (!(memory_region_is_ram(section->mr) ||
3691
          memory_region_is_romd(section->mr))) {
3692
        /* I/O case */
3693
        addr = memory_region_section_addr(section, addr);
3694
        val = io_mem_read(section->mr, addr, 4);
3695
#if defined(TARGET_WORDS_BIGENDIAN)
3696
        if (endian == DEVICE_LITTLE_ENDIAN) {
3697
            val = bswap32(val);
3698
        }
3699
#else
3700
        if (endian == DEVICE_BIG_ENDIAN) {
3701
            val = bswap32(val);
3702
        }
3703
#endif
3704
    } else {
3705
        /* RAM case */
3706
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3707
                                & TARGET_PAGE_MASK)
3708
                               + memory_region_section_addr(section, addr));
3709
        switch (endian) {
3710
        case DEVICE_LITTLE_ENDIAN:
3711
            val = ldl_le_p(ptr);
3712
            break;
3713
        case DEVICE_BIG_ENDIAN:
3714
            val = ldl_be_p(ptr);
3715
            break;
3716
        default:
3717
            val = ldl_p(ptr);
3718
            break;
3719
        }
3720
    }
3721
    return val;
3722
}
3723

    
3724
uint32_t ldl_phys(target_phys_addr_t addr)
3725
{
3726
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3727
}
3728

    
3729
uint32_t ldl_le_phys(target_phys_addr_t addr)
3730
{
3731
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3732
}
3733

    
3734
uint32_t ldl_be_phys(target_phys_addr_t addr)
3735
{
3736
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3737
}
3738

    
3739
/* warning: addr must be aligned */
3740
static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3741
                                         enum device_endian endian)
3742
{
3743
    uint8_t *ptr;
3744
    uint64_t val;
3745
    MemoryRegionSection *section;
3746

    
3747
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3748

    
3749
    if (!(memory_region_is_ram(section->mr) ||
3750
          memory_region_is_romd(section->mr))) {
3751
        /* I/O case */
3752
        addr = memory_region_section_addr(section, addr);
3753

    
3754
        /* XXX This is broken when device endian != cpu endian.
3755
               Fix and add "endian" variable check */
3756
#ifdef TARGET_WORDS_BIGENDIAN
3757
        val = io_mem_read(section->mr, addr, 4) << 32;
3758
        val |= io_mem_read(section->mr, addr + 4, 4);
3759
#else
3760
        val = io_mem_read(section->mr, addr, 4);
3761
        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3762
#endif
3763
    } else {
3764
        /* RAM case */
3765
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3766
                                & TARGET_PAGE_MASK)
3767
                               + memory_region_section_addr(section, addr));
3768
        switch (endian) {
3769
        case DEVICE_LITTLE_ENDIAN:
3770
            val = ldq_le_p(ptr);
3771
            break;
3772
        case DEVICE_BIG_ENDIAN:
3773
            val = ldq_be_p(ptr);
3774
            break;
3775
        default:
3776
            val = ldq_p(ptr);
3777
            break;
3778
        }
3779
    }
3780
    return val;
3781
}
3782

    
3783
uint64_t ldq_phys(target_phys_addr_t addr)
3784
{
3785
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3786
}
3787

    
3788
uint64_t ldq_le_phys(target_phys_addr_t addr)
3789
{
3790
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3791
}
3792

    
3793
uint64_t ldq_be_phys(target_phys_addr_t addr)
3794
{
3795
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3796
}
3797

    
3798
/* XXX: optimize */
3799
uint32_t ldub_phys(target_phys_addr_t addr)
3800
{
3801
    uint8_t val;
3802
    cpu_physical_memory_read(addr, &val, 1);
3803
    return val;
3804
}
3805

    
3806
/* warning: addr must be aligned */
3807
static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3808
                                          enum device_endian endian)
3809
{
3810
    uint8_t *ptr;
3811
    uint64_t val;
3812
    MemoryRegionSection *section;
3813

    
3814
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3815

    
3816
    if (!(memory_region_is_ram(section->mr) ||
3817
          memory_region_is_romd(section->mr))) {
3818
        /* I/O case */
3819
        addr = memory_region_section_addr(section, addr);
3820
        val = io_mem_read(section->mr, addr, 2);
3821
#if defined(TARGET_WORDS_BIGENDIAN)
3822
        if (endian == DEVICE_LITTLE_ENDIAN) {
3823
            val = bswap16(val);
3824
        }
3825
#else
3826
        if (endian == DEVICE_BIG_ENDIAN) {
3827
            val = bswap16(val);
3828
        }
3829
#endif
3830
    } else {
3831
        /* RAM case */
3832
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3833
                                & TARGET_PAGE_MASK)
3834
                               + memory_region_section_addr(section, addr));
3835
        switch (endian) {
3836
        case DEVICE_LITTLE_ENDIAN:
3837
            val = lduw_le_p(ptr);
3838
            break;
3839
        case DEVICE_BIG_ENDIAN:
3840
            val = lduw_be_p(ptr);
3841
            break;
3842
        default:
3843
            val = lduw_p(ptr);
3844
            break;
3845
        }
3846
    }
3847
    return val;
3848
}
3849

    
3850
uint32_t lduw_phys(target_phys_addr_t addr)
3851
{
3852
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3853
}
3854

    
3855
uint32_t lduw_le_phys(target_phys_addr_t addr)
3856
{
3857
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3858
}
3859

    
3860
uint32_t lduw_be_phys(target_phys_addr_t addr)
3861
{
3862
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3863
}
3864

    
3865
/* warning: addr must be aligned. The ram page is not masked as dirty
3866
   and the code inside is not invalidated. It is useful if the dirty
3867
   bits are used to track modified PTEs */
3868
void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3869
{
3870
    uint8_t *ptr;
3871
    MemoryRegionSection *section;
3872

    
3873
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3874

    
3875
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3876
        addr = memory_region_section_addr(section, addr);
3877
        if (memory_region_is_ram(section->mr)) {
3878
            section = &phys_sections[phys_section_rom];
3879
        }
3880
        io_mem_write(section->mr, addr, val, 4);
3881
    } else {
3882
        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3883
                               & TARGET_PAGE_MASK)
3884
            + memory_region_section_addr(section, addr);
3885
        ptr = qemu_get_ram_ptr(addr1);
3886
        stl_p(ptr, val);
3887

    
3888
        if (unlikely(in_migration)) {
3889
            if (!cpu_physical_memory_is_dirty(addr1)) {
3890
                /* invalidate code */
3891
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3892
                /* set dirty bit */
3893
                cpu_physical_memory_set_dirty_flags(
3894
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
3895
            }
3896
        }
3897
    }
3898
}
3899

    
3900
void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3901
{
3902
    uint8_t *ptr;
3903
    MemoryRegionSection *section;
3904

    
3905
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3906

    
3907
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3908
        addr = memory_region_section_addr(section, addr);
3909
        if (memory_region_is_ram(section->mr)) {
3910
            section = &phys_sections[phys_section_rom];
3911
        }
3912
#ifdef TARGET_WORDS_BIGENDIAN
3913
        io_mem_write(section->mr, addr, val >> 32, 4);
3914
        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3915
#else
3916
        io_mem_write(section->mr, addr, (uint32_t)val, 4);
3917
        io_mem_write(section->mr, addr + 4, val >> 32, 4);
3918
#endif
3919
    } else {
3920
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3921
                                & TARGET_PAGE_MASK)
3922
                               + memory_region_section_addr(section, addr));
3923
        stq_p(ptr, val);
3924
    }
3925
}
3926

    
3927
/* warning: addr must be aligned */
3928
static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3929
                                     enum device_endian endian)
3930
{
3931
    uint8_t *ptr;
3932
    MemoryRegionSection *section;
3933

    
3934
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3935

    
3936
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3937
        addr = memory_region_section_addr(section, addr);
3938
        if (memory_region_is_ram(section->mr)) {
3939
            section = &phys_sections[phys_section_rom];
3940
        }
3941
#if defined(TARGET_WORDS_BIGENDIAN)
3942
        if (endian == DEVICE_LITTLE_ENDIAN) {
3943
            val = bswap32(val);
3944
        }
3945
#else
3946
        if (endian == DEVICE_BIG_ENDIAN) {
3947
            val = bswap32(val);
3948
        }
3949
#endif
3950
        io_mem_write(section->mr, addr, val, 4);
3951
    } else {
3952
        unsigned long addr1;
3953
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3954
            + memory_region_section_addr(section, addr);
3955
        /* RAM case */
3956
        ptr = qemu_get_ram_ptr(addr1);
3957
        switch (endian) {
3958
        case DEVICE_LITTLE_ENDIAN:
3959
            stl_le_p(ptr, val);
3960
            break;
3961
        case DEVICE_BIG_ENDIAN:
3962
            stl_be_p(ptr, val);
3963
            break;
3964
        default:
3965
            stl_p(ptr, val);
3966
            break;
3967
        }
3968
        invalidate_and_set_dirty(addr1, 4);
3969
    }
3970
}
3971

    
3972
void stl_phys(target_phys_addr_t addr, uint32_t val)
3973
{
3974
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3975
}
3976

    
3977
void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3978
{
3979
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3980
}
3981

    
3982
void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3983
{
3984
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3985
}
3986

    
3987
/* XXX: optimize */
3988
void stb_phys(target_phys_addr_t addr, uint32_t val)
3989
{
3990
    uint8_t v = val;
3991
    cpu_physical_memory_write(addr, &v, 1);
3992
}
3993

    
3994
/* warning: addr must be aligned */
3995
static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
3996
                                     enum device_endian endian)
3997
{
3998
    uint8_t *ptr;
3999
    MemoryRegionSection *section;
4000

    
4001
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
4002

    
4003
    if (!memory_region_is_ram(section->mr) || section->readonly) {
4004
        addr = memory_region_section_addr(section, addr);
4005
        if (memory_region_is_ram(section->mr)) {
4006
            section = &phys_sections[phys_section_rom];
4007
        }
4008
#if defined(TARGET_WORDS_BIGENDIAN)
4009
        if (endian == DEVICE_LITTLE_ENDIAN) {
4010
            val = bswap16(val);
4011
        }
4012
#else
4013
        if (endian == DEVICE_BIG_ENDIAN) {
4014
            val = bswap16(val);
4015
        }
4016
#endif
4017
        io_mem_write(section->mr, addr, val, 2);
4018
    } else {
4019
        unsigned long addr1;
4020
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4021
            + memory_region_section_addr(section, addr);
4022
        /* RAM case */
4023
        ptr = qemu_get_ram_ptr(addr1);
4024
        switch (endian) {
4025
        case DEVICE_LITTLE_ENDIAN:
4026
            stw_le_p(ptr, val);
4027
            break;
4028
        case DEVICE_BIG_ENDIAN:
4029
            stw_be_p(ptr, val);
4030
            break;
4031
        default:
4032
            stw_p(ptr, val);
4033
            break;
4034
        }
4035
        invalidate_and_set_dirty(addr1, 2);
4036
    }
4037
}
4038

    
4039
void stw_phys(target_phys_addr_t addr, uint32_t val)
4040
{
4041
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4042
}
4043

    
4044
void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4045
{
4046
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4047
}
4048

    
4049
void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4050
{
4051
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4052
}
4053

    
4054
/* XXX: optimize */
4055
void stq_phys(target_phys_addr_t addr, uint64_t val)
4056
{
4057
    val = tswap64(val);
4058
    cpu_physical_memory_write(addr, &val, 8);
4059
}
4060

    
4061
void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4062
{
4063
    val = cpu_to_le64(val);
4064
    cpu_physical_memory_write(addr, &val, 8);
4065
}
4066

    
4067
void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4068
{
4069
    val = cpu_to_be64(val);
4070
    cpu_physical_memory_write(addr, &val, 8);
4071
}
4072

    
4073
/* virtual memory access for debug (includes writing to ROM) */
4074
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4075
                        uint8_t *buf, int len, int is_write)
4076
{
4077
    int l;
4078
    target_phys_addr_t phys_addr;
4079
    target_ulong page;
4080

    
4081
    while (len > 0) {
4082
        page = addr & TARGET_PAGE_MASK;
4083
        phys_addr = cpu_get_phys_page_debug(env, page);
4084
        /* if no physical page mapped, return an error */
4085
        if (phys_addr == -1)
4086
            return -1;
4087
        l = (page + TARGET_PAGE_SIZE) - addr;
4088
        if (l > len)
4089
            l = len;
4090
        phys_addr += (addr & ~TARGET_PAGE_MASK);
4091
        if (is_write)
4092
            cpu_physical_memory_write_rom(phys_addr, buf, l);
4093
        else
4094
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4095
        len -= l;
4096
        buf += l;
4097
        addr += l;
4098
    }
4099
    return 0;
4100
}
4101
#endif
4102

    
4103
/* in deterministic execution mode, instructions doing device I/Os
4104
   must be at the end of the TB */
4105
void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4106
{
4107
    TranslationBlock *tb;
4108
    uint32_t n, cflags;
4109
    target_ulong pc, cs_base;
4110
    uint64_t flags;
4111

    
4112
    tb = tb_find_pc(retaddr);
4113
    if (!tb) {
4114
        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
4115
                  (void *)retaddr);
4116
    }
4117
    n = env->icount_decr.u16.low + tb->icount;
4118
    cpu_restore_state(tb, env, retaddr);
4119
    /* Calculate how many instructions had been executed before the fault
4120
       occurred.  */
4121
    n = n - env->icount_decr.u16.low;
4122
    /* Generate a new TB ending on the I/O insn.  */
4123
    n++;
4124
    /* On MIPS and SH, delay slot instructions can only be restarted if
4125
       they were already the first instruction in the TB.  If this is not
4126
       the first instruction in a TB then re-execute the preceding
4127
       branch.  */
4128
#if defined(TARGET_MIPS)
4129
    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4130
        env->active_tc.PC -= 4;
4131
        env->icount_decr.u16.low++;
4132
        env->hflags &= ~MIPS_HFLAG_BMASK;
4133
    }
4134
#elif defined(TARGET_SH4)
4135
    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4136
            && n > 1) {
4137
        env->pc -= 2;
4138
        env->icount_decr.u16.low++;
4139
        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4140
    }
4141
#endif
4142
    /* This should never happen.  */
4143
    if (n > CF_COUNT_MASK)
4144
        cpu_abort(env, "TB too big during recompile");
4145

    
4146
    cflags = n | CF_LAST_IO;
4147
    pc = tb->pc;
4148
    cs_base = tb->cs_base;
4149
    flags = tb->flags;
4150
    tb_phys_invalidate(tb, -1);
4151
    /* FIXME: In theory this could raise an exception.  In practice
4152
       we have already translated the block once so it's probably ok.  */
4153
    tb_gen_code(env, pc, cs_base, flags, cflags);
4154
    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4155
       the first in the TB) then we end up generating a whole new TB and
4156
       repeating the fault, which is horribly inefficient.
4157
       Better would be to execute just this insn uncached, or generate a
4158
       second new TB.  */
4159
    cpu_resume_from_signal(env, NULL);
4160
}
4161

    
4162
#if !defined(CONFIG_USER_ONLY)
4163

    
4164
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4165
{
4166
    int i, target_code_size, max_target_code_size;
4167
    int direct_jmp_count, direct_jmp2_count, cross_page;
4168
    TranslationBlock *tb;
4169

    
4170
    target_code_size = 0;
4171
    max_target_code_size = 0;
4172
    cross_page = 0;
4173
    direct_jmp_count = 0;
4174
    direct_jmp2_count = 0;
4175
    for(i = 0; i < nb_tbs; i++) {
4176
        tb = &tbs[i];
4177
        target_code_size += tb->size;
4178
        if (tb->size > max_target_code_size)
4179
            max_target_code_size = tb->size;
4180
        if (tb->page_addr[1] != -1)
4181
            cross_page++;
4182
        if (tb->tb_next_offset[0] != 0xffff) {
4183
            direct_jmp_count++;
4184
            if (tb->tb_next_offset[1] != 0xffff) {
4185
                direct_jmp2_count++;
4186
            }
4187
        }
4188
    }
4189
    /* XXX: avoid using doubles ? */
4190
    cpu_fprintf(f, "Translation buffer state:\n");
4191
    cpu_fprintf(f, "gen code size       %td/%ld\n",
4192
                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4193
    cpu_fprintf(f, "TB count            %d/%d\n", 
4194
                nb_tbs, code_gen_max_blocks);
4195
    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
4196
                nb_tbs ? target_code_size / nb_tbs : 0,
4197
                max_target_code_size);
4198
    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
4199
                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4200
                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4201
    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4202
            cross_page,
4203
            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4204
    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
4205
                direct_jmp_count,
4206
                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4207
                direct_jmp2_count,
4208
                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4209
    cpu_fprintf(f, "\nStatistics:\n");
4210
    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
4211
    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4212
    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
4213
    tcg_dump_info(f, cpu_fprintf);
4214
}
4215

    
4216
/*
4217
 * A helper function for the _utterly broken_ virtio device model to find out if
4218
 * it's running on a big endian machine. Don't do this at home kids!
4219
 */
4220
bool virtio_is_big_endian(void);
4221
bool virtio_is_big_endian(void)
4222
{
4223
#if defined(TARGET_WORDS_BIGENDIAN)
4224
    return true;
4225
#else
4226
    return false;
4227
#endif
4228
}
4229

    
4230
#endif
4231

    
4232
#ifndef CONFIG_USER_ONLY
4233
bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4234
{
4235
    MemoryRegionSection *section;
4236

    
4237
    section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4238

    
4239
    return !(memory_region_is_ram(section->mr) ||
4240
             memory_region_is_romd(section->mr));
4241
}
4242
#endif