Statistics
| Branch: | Revision:

root / exec.c @ 45724d6d

History | View | Annotate | Download (122.7 kB)

1
/*
2
 *  virtual page mapping and translated block handling
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "osdep.h"
33
#include "kvm.h"
34
#include "hw/xen.h"
35
#include "qemu-timer.h"
36
#include "memory.h"
37
#include "exec-memory.h"
38
#if defined(CONFIG_USER_ONLY)
39
#include <qemu.h>
40
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41
#include <sys/param.h>
42
#if __FreeBSD_version >= 700104
43
#define HAVE_KINFO_GETVMMAP
44
#define sigqueue sigqueue_freebsd  /* avoid redefinition */
45
#include <sys/time.h>
46
#include <sys/proc.h>
47
#include <machine/profile.h>
48
#define _KERNEL
49
#include <sys/user.h>
50
#undef _KERNEL
51
#undef sigqueue
52
#include <libutil.h>
53
#endif
54
#endif
55
#else /* !CONFIG_USER_ONLY */
56
#include "xen-mapcache.h"
57
#include "trace.h"
58
#endif
59

    
60
#include "cputlb.h"
61

    
62
#define WANT_EXEC_OBSOLETE
63
#include "exec-obsolete.h"
64

    
65
//#define DEBUG_TB_INVALIDATE
66
//#define DEBUG_FLUSH
67
//#define DEBUG_UNASSIGNED
68

    
69
/* make various TB consistency checks */
70
//#define DEBUG_TB_CHECK
71

    
72
//#define DEBUG_IOPORT
73
//#define DEBUG_SUBPAGE
74

    
75
#if !defined(CONFIG_USER_ONLY)
76
/* TB consistency checks only implemented for usermode emulation.  */
77
#undef DEBUG_TB_CHECK
78
#endif
79

    
80
#define SMC_BITMAP_USE_THRESHOLD 10
81

    
82
static TranslationBlock *tbs;
83
static int code_gen_max_blocks;
84
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85
static int nb_tbs;
86
/* any access to the tbs or the page table must use this lock */
87
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88

    
89
#if defined(__arm__) || defined(__sparc_v9__)
90
/* The prologue must be reachable with a direct jump. ARM and Sparc64
91
 have limited branch ranges (possibly also PPC) so place it in a
92
 section close to code segment. */
93
#define code_gen_section                                \
94
    __attribute__((__section__(".gen_code")))           \
95
    __attribute__((aligned (32)))
96
#elif defined(_WIN32) && !defined(_WIN64)
97
#define code_gen_section                                \
98
    __attribute__((aligned (16)))
99
#else
100
#define code_gen_section                                \
101
    __attribute__((aligned (32)))
102
#endif
103

    
104
uint8_t code_gen_prologue[1024] code_gen_section;
105
static uint8_t *code_gen_buffer;
106
static unsigned long code_gen_buffer_size;
107
/* threshold to flush the translated code buffer */
108
static unsigned long code_gen_buffer_max_size;
109
static uint8_t *code_gen_ptr;
110

    
111
#if !defined(CONFIG_USER_ONLY)
112
int phys_ram_fd;
113
static int in_migration;
114

    
115
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116

    
117
static MemoryRegion *system_memory;
118
static MemoryRegion *system_io;
119

    
120
MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121
static MemoryRegion io_mem_subpage_ram;
122

    
123
#endif
124

    
125
CPUArchState *first_cpu;
126
/* current CPU in the current thread. It is only valid inside
127
   cpu_exec() */
128
DEFINE_TLS(CPUArchState *,cpu_single_env);
129
/* 0 = Do not count executed instructions.
130
   1 = Precise instruction counting.
131
   2 = Adaptive rate instruction counting.  */
132
int use_icount = 0;
133

    
134
typedef struct PageDesc {
135
    /* list of TBs intersecting this ram page */
136
    TranslationBlock *first_tb;
137
    /* in order to optimize self modifying code, we count the number
138
       of lookups we do to a given page to use a bitmap */
139
    unsigned int code_write_count;
140
    uint8_t *code_bitmap;
141
#if defined(CONFIG_USER_ONLY)
142
    unsigned long flags;
143
#endif
144
} PageDesc;
145

    
146
/* In system mode we want L1_MAP to be based on ram offsets,
147
   while in user mode we want it to be based on virtual addresses.  */
148
#if !defined(CONFIG_USER_ONLY)
149
#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150
# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
151
#else
152
# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
153
#endif
154
#else
155
# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
156
#endif
157

    
158
/* Size of the L2 (and L3, etc) page tables.  */
159
#define L2_BITS 10
160
#define L2_SIZE (1 << L2_BITS)
161

    
162
#define P_L2_LEVELS \
163
    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164

    
165
/* The bits remaining after N lower levels of page tables.  */
166
#define V_L1_BITS_REM \
167
    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168

    
169
#if V_L1_BITS_REM < 4
170
#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
171
#else
172
#define V_L1_BITS  V_L1_BITS_REM
173
#endif
174

    
175
#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
176

    
177
#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178

    
179
uintptr_t qemu_real_host_page_size;
180
uintptr_t qemu_host_page_size;
181
uintptr_t qemu_host_page_mask;
182

    
183
/* This is a multi-level map on the virtual address space.
184
   The bottom level has pointers to PageDesc.  */
185
static void *l1_map[V_L1_SIZE];
186

    
187
#if !defined(CONFIG_USER_ONLY)
188
typedef struct PhysPageEntry PhysPageEntry;
189

    
190
static MemoryRegionSection *phys_sections;
191
static unsigned phys_sections_nb, phys_sections_nb_alloc;
192
static uint16_t phys_section_unassigned;
193
static uint16_t phys_section_notdirty;
194
static uint16_t phys_section_rom;
195
static uint16_t phys_section_watch;
196

    
197
struct PhysPageEntry {
198
    uint16_t is_leaf : 1;
199
     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200
    uint16_t ptr : 15;
201
};
202

    
203
/* Simple allocator for PhysPageEntry nodes */
204
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206

    
207
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208

    
209
/* This is a multi-level map on the physical address space.
210
   The bottom level has pointers to MemoryRegionSections.  */
211
static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212

    
213
static void io_mem_init(void);
214
static void memory_map_init(void);
215

    
216
static MemoryRegion io_mem_watch;
217
#endif
218

    
219
/* statistics */
220
static int tb_flush_count;
221
static int tb_phys_invalidate_count;
222

    
223
#ifdef _WIN32
224
static void map_exec(void *addr, long size)
225
{
226
    DWORD old_protect;
227
    VirtualProtect(addr, size,
228
                   PAGE_EXECUTE_READWRITE, &old_protect);
229
    
230
}
231
#else
232
static void map_exec(void *addr, long size)
233
{
234
    unsigned long start, end, page_size;
235
    
236
    page_size = getpagesize();
237
    start = (unsigned long)addr;
238
    start &= ~(page_size - 1);
239
    
240
    end = (unsigned long)addr + size;
241
    end += page_size - 1;
242
    end &= ~(page_size - 1);
243
    
244
    mprotect((void *)start, end - start,
245
             PROT_READ | PROT_WRITE | PROT_EXEC);
246
}
247
#endif
248

    
249
static void page_init(void)
250
{
251
    /* NOTE: we can always suppose that qemu_host_page_size >=
252
       TARGET_PAGE_SIZE */
253
#ifdef _WIN32
254
    {
255
        SYSTEM_INFO system_info;
256

    
257
        GetSystemInfo(&system_info);
258
        qemu_real_host_page_size = system_info.dwPageSize;
259
    }
260
#else
261
    qemu_real_host_page_size = getpagesize();
262
#endif
263
    if (qemu_host_page_size == 0)
264
        qemu_host_page_size = qemu_real_host_page_size;
265
    if (qemu_host_page_size < TARGET_PAGE_SIZE)
266
        qemu_host_page_size = TARGET_PAGE_SIZE;
267
    qemu_host_page_mask = ~(qemu_host_page_size - 1);
268

    
269
#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
270
    {
271
#ifdef HAVE_KINFO_GETVMMAP
272
        struct kinfo_vmentry *freep;
273
        int i, cnt;
274

    
275
        freep = kinfo_getvmmap(getpid(), &cnt);
276
        if (freep) {
277
            mmap_lock();
278
            for (i = 0; i < cnt; i++) {
279
                unsigned long startaddr, endaddr;
280

    
281
                startaddr = freep[i].kve_start;
282
                endaddr = freep[i].kve_end;
283
                if (h2g_valid(startaddr)) {
284
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
285

    
286
                    if (h2g_valid(endaddr)) {
287
                        endaddr = h2g(endaddr);
288
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
289
                    } else {
290
#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
291
                        endaddr = ~0ul;
292
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
293
#endif
294
                    }
295
                }
296
            }
297
            free(freep);
298
            mmap_unlock();
299
        }
300
#else
301
        FILE *f;
302

    
303
        last_brk = (unsigned long)sbrk(0);
304

    
305
        f = fopen("/compat/linux/proc/self/maps", "r");
306
        if (f) {
307
            mmap_lock();
308

    
309
            do {
310
                unsigned long startaddr, endaddr;
311
                int n;
312

    
313
                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
314

    
315
                if (n == 2 && h2g_valid(startaddr)) {
316
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
317

    
318
                    if (h2g_valid(endaddr)) {
319
                        endaddr = h2g(endaddr);
320
                    } else {
321
                        endaddr = ~0ul;
322
                    }
323
                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
324
                }
325
            } while (!feof(f));
326

    
327
            fclose(f);
328
            mmap_unlock();
329
        }
330
#endif
331
    }
332
#endif
333
}
334

    
335
static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
336
{
337
    PageDesc *pd;
338
    void **lp;
339
    int i;
340

    
341
#if defined(CONFIG_USER_ONLY)
342
    /* We can't use g_malloc because it may recurse into a locked mutex. */
343
# define ALLOC(P, SIZE)                                 \
344
    do {                                                \
345
        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
346
                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
347
    } while (0)
348
#else
349
# define ALLOC(P, SIZE) \
350
    do { P = g_malloc0(SIZE); } while (0)
351
#endif
352

    
353
    /* Level 1.  Always allocated.  */
354
    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
355

    
356
    /* Level 2..N-1.  */
357
    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
358
        void **p = *lp;
359

    
360
        if (p == NULL) {
361
            if (!alloc) {
362
                return NULL;
363
            }
364
            ALLOC(p, sizeof(void *) * L2_SIZE);
365
            *lp = p;
366
        }
367

    
368
        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
369
    }
370

    
371
    pd = *lp;
372
    if (pd == NULL) {
373
        if (!alloc) {
374
            return NULL;
375
        }
376
        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
377
        *lp = pd;
378
    }
379

    
380
#undef ALLOC
381

    
382
    return pd + (index & (L2_SIZE - 1));
383
}
384

    
385
static inline PageDesc *page_find(tb_page_addr_t index)
386
{
387
    return page_find_alloc(index, 0);
388
}
389

    
390
#if !defined(CONFIG_USER_ONLY)
391

    
392
static void phys_map_node_reserve(unsigned nodes)
393
{
394
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
395
        typedef PhysPageEntry Node[L2_SIZE];
396
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
397
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
398
                                      phys_map_nodes_nb + nodes);
399
        phys_map_nodes = g_renew(Node, phys_map_nodes,
400
                                 phys_map_nodes_nb_alloc);
401
    }
402
}
403

    
404
static uint16_t phys_map_node_alloc(void)
405
{
406
    unsigned i;
407
    uint16_t ret;
408

    
409
    ret = phys_map_nodes_nb++;
410
    assert(ret != PHYS_MAP_NODE_NIL);
411
    assert(ret != phys_map_nodes_nb_alloc);
412
    for (i = 0; i < L2_SIZE; ++i) {
413
        phys_map_nodes[ret][i].is_leaf = 0;
414
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
415
    }
416
    return ret;
417
}
418

    
419
static void phys_map_nodes_reset(void)
420
{
421
    phys_map_nodes_nb = 0;
422
}
423

    
424

    
425
static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
426
                                target_phys_addr_t *nb, uint16_t leaf,
427
                                int level)
428
{
429
    PhysPageEntry *p;
430
    int i;
431
    target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
432

    
433
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
434
        lp->ptr = phys_map_node_alloc();
435
        p = phys_map_nodes[lp->ptr];
436
        if (level == 0) {
437
            for (i = 0; i < L2_SIZE; i++) {
438
                p[i].is_leaf = 1;
439
                p[i].ptr = phys_section_unassigned;
440
            }
441
        }
442
    } else {
443
        p = phys_map_nodes[lp->ptr];
444
    }
445
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
446

    
447
    while (*nb && lp < &p[L2_SIZE]) {
448
        if ((*index & (step - 1)) == 0 && *nb >= step) {
449
            lp->is_leaf = true;
450
            lp->ptr = leaf;
451
            *index += step;
452
            *nb -= step;
453
        } else {
454
            phys_page_set_level(lp, index, nb, leaf, level - 1);
455
        }
456
        ++lp;
457
    }
458
}
459

    
460
static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
461
                          uint16_t leaf)
462
{
463
    /* Wildly overreserve - it doesn't matter much. */
464
    phys_map_node_reserve(3 * P_L2_LEVELS);
465

    
466
    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
467
}
468

    
469
MemoryRegionSection *phys_page_find(target_phys_addr_t index)
470
{
471
    PhysPageEntry lp = phys_map;
472
    PhysPageEntry *p;
473
    int i;
474
    uint16_t s_index = phys_section_unassigned;
475

    
476
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
477
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
478
            goto not_found;
479
        }
480
        p = phys_map_nodes[lp.ptr];
481
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
482
    }
483

    
484
    s_index = lp.ptr;
485
not_found:
486
    return &phys_sections[s_index];
487
}
488

    
489
bool memory_region_is_unassigned(MemoryRegion *mr)
490
{
491
    return mr != &io_mem_ram && mr != &io_mem_rom
492
        && mr != &io_mem_notdirty && !mr->rom_device
493
        && mr != &io_mem_watch;
494
}
495

    
496
#define mmap_lock() do { } while(0)
497
#define mmap_unlock() do { } while(0)
498
#endif
499

    
500
#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
501

    
502
#if defined(CONFIG_USER_ONLY)
503
/* Currently it is not recommended to allocate big chunks of data in
504
   user mode. It will change when a dedicated libc will be used */
505
#define USE_STATIC_CODE_GEN_BUFFER
506
#endif
507

    
508
#ifdef USE_STATIC_CODE_GEN_BUFFER
509
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
510
               __attribute__((aligned (CODE_GEN_ALIGN)));
511
#endif
512

    
513
static void code_gen_alloc(unsigned long tb_size)
514
{
515
#ifdef USE_STATIC_CODE_GEN_BUFFER
516
    code_gen_buffer = static_code_gen_buffer;
517
    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
518
    map_exec(code_gen_buffer, code_gen_buffer_size);
519
#else
520
    code_gen_buffer_size = tb_size;
521
    if (code_gen_buffer_size == 0) {
522
#if defined(CONFIG_USER_ONLY)
523
        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
524
#else
525
        /* XXX: needs adjustments */
526
        code_gen_buffer_size = (unsigned long)(ram_size / 4);
527
#endif
528
    }
529
    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
530
        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
531
    /* The code gen buffer location may have constraints depending on
532
       the host cpu and OS */
533
#if defined(__linux__) 
534
    {
535
        int flags;
536
        void *start = NULL;
537

    
538
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
539
#if defined(__x86_64__)
540
        flags |= MAP_32BIT;
541
        /* Cannot map more than that */
542
        if (code_gen_buffer_size > (800 * 1024 * 1024))
543
            code_gen_buffer_size = (800 * 1024 * 1024);
544
#elif defined(__sparc_v9__)
545
        // Map the buffer below 2G, so we can use direct calls and branches
546
        flags |= MAP_FIXED;
547
        start = (void *) 0x60000000UL;
548
        if (code_gen_buffer_size > (512 * 1024 * 1024))
549
            code_gen_buffer_size = (512 * 1024 * 1024);
550
#elif defined(__arm__)
551
        /* Keep the buffer no bigger than 16MB to branch between blocks */
552
        if (code_gen_buffer_size > 16 * 1024 * 1024)
553
            code_gen_buffer_size = 16 * 1024 * 1024;
554
#elif defined(__s390x__)
555
        /* Map the buffer so that we can use direct calls and branches.  */
556
        /* We have a +- 4GB range on the branches; leave some slop.  */
557
        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
558
            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
559
        }
560
        start = (void *)0x90000000UL;
561
#endif
562
        code_gen_buffer = mmap(start, code_gen_buffer_size,
563
                               PROT_WRITE | PROT_READ | PROT_EXEC,
564
                               flags, -1, 0);
565
        if (code_gen_buffer == MAP_FAILED) {
566
            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
567
            exit(1);
568
        }
569
    }
570
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
571
    || defined(__DragonFly__) || defined(__OpenBSD__) \
572
    || defined(__NetBSD__)
573
    {
574
        int flags;
575
        void *addr = NULL;
576
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
577
#if defined(__x86_64__)
578
        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
579
         * 0x40000000 is free */
580
        flags |= MAP_FIXED;
581
        addr = (void *)0x40000000;
582
        /* Cannot map more than that */
583
        if (code_gen_buffer_size > (800 * 1024 * 1024))
584
            code_gen_buffer_size = (800 * 1024 * 1024);
585
#elif defined(__sparc_v9__)
586
        // Map the buffer below 2G, so we can use direct calls and branches
587
        flags |= MAP_FIXED;
588
        addr = (void *) 0x60000000UL;
589
        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
590
            code_gen_buffer_size = (512 * 1024 * 1024);
591
        }
592
#endif
593
        code_gen_buffer = mmap(addr, code_gen_buffer_size,
594
                               PROT_WRITE | PROT_READ | PROT_EXEC, 
595
                               flags, -1, 0);
596
        if (code_gen_buffer == MAP_FAILED) {
597
            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
598
            exit(1);
599
        }
600
    }
601
#else
602
    code_gen_buffer = g_malloc(code_gen_buffer_size);
603
    map_exec(code_gen_buffer, code_gen_buffer_size);
604
#endif
605
#endif /* !USE_STATIC_CODE_GEN_BUFFER */
606
    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
607
    code_gen_buffer_max_size = code_gen_buffer_size -
608
        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
609
    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
610
    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
611
}
612

    
613
/* Must be called before using the QEMU cpus. 'tb_size' is the size
614
   (in bytes) allocated to the translation buffer. Zero means default
615
   size. */
616
void tcg_exec_init(unsigned long tb_size)
617
{
618
    cpu_gen_init();
619
    code_gen_alloc(tb_size);
620
    code_gen_ptr = code_gen_buffer;
621
    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
622
    page_init();
623
#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
624
    /* There's no guest base to take into account, so go ahead and
625
       initialize the prologue now.  */
626
    tcg_prologue_init(&tcg_ctx);
627
#endif
628
}
629

    
630
bool tcg_enabled(void)
631
{
632
    return code_gen_buffer != NULL;
633
}
634

    
635
void cpu_exec_init_all(void)
636
{
637
#if !defined(CONFIG_USER_ONLY)
638
    memory_map_init();
639
    io_mem_init();
640
#endif
641
}
642

    
643
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
644

    
645
static int cpu_common_post_load(void *opaque, int version_id)
646
{
647
    CPUArchState *env = opaque;
648

    
649
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
650
       version_id is increased. */
651
    env->interrupt_request &= ~0x01;
652
    tlb_flush(env, 1);
653

    
654
    return 0;
655
}
656

    
657
static const VMStateDescription vmstate_cpu_common = {
658
    .name = "cpu_common",
659
    .version_id = 1,
660
    .minimum_version_id = 1,
661
    .minimum_version_id_old = 1,
662
    .post_load = cpu_common_post_load,
663
    .fields      = (VMStateField []) {
664
        VMSTATE_UINT32(halted, CPUArchState),
665
        VMSTATE_UINT32(interrupt_request, CPUArchState),
666
        VMSTATE_END_OF_LIST()
667
    }
668
};
669
#endif
670

    
671
CPUArchState *qemu_get_cpu(int cpu)
672
{
673
    CPUArchState *env = first_cpu;
674

    
675
    while (env) {
676
        if (env->cpu_index == cpu)
677
            break;
678
        env = env->next_cpu;
679
    }
680

    
681
    return env;
682
}
683

    
684
void cpu_exec_init(CPUArchState *env)
685
{
686
    CPUArchState **penv;
687
    int cpu_index;
688

    
689
#if defined(CONFIG_USER_ONLY)
690
    cpu_list_lock();
691
#endif
692
    env->next_cpu = NULL;
693
    penv = &first_cpu;
694
    cpu_index = 0;
695
    while (*penv != NULL) {
696
        penv = &(*penv)->next_cpu;
697
        cpu_index++;
698
    }
699
    env->cpu_index = cpu_index;
700
    env->numa_node = 0;
701
    QTAILQ_INIT(&env->breakpoints);
702
    QTAILQ_INIT(&env->watchpoints);
703
#ifndef CONFIG_USER_ONLY
704
    env->thread_id = qemu_get_thread_id();
705
#endif
706
    *penv = env;
707
#if defined(CONFIG_USER_ONLY)
708
    cpu_list_unlock();
709
#endif
710
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
711
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
712
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
713
                    cpu_save, cpu_load, env);
714
#endif
715
}
716

    
717
/* Allocate a new translation block. Flush the translation buffer if
718
   too many translation blocks or too much generated code. */
719
static TranslationBlock *tb_alloc(target_ulong pc)
720
{
721
    TranslationBlock *tb;
722

    
723
    if (nb_tbs >= code_gen_max_blocks ||
724
        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
725
        return NULL;
726
    tb = &tbs[nb_tbs++];
727
    tb->pc = pc;
728
    tb->cflags = 0;
729
    return tb;
730
}
731

    
732
void tb_free(TranslationBlock *tb)
733
{
734
    /* In practice this is mostly used for single use temporary TB
735
       Ignore the hard cases and just back up if this TB happens to
736
       be the last one generated.  */
737
    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
738
        code_gen_ptr = tb->tc_ptr;
739
        nb_tbs--;
740
    }
741
}
742

    
743
static inline void invalidate_page_bitmap(PageDesc *p)
744
{
745
    if (p->code_bitmap) {
746
        g_free(p->code_bitmap);
747
        p->code_bitmap = NULL;
748
    }
749
    p->code_write_count = 0;
750
}
751

    
752
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
753

    
754
static void page_flush_tb_1 (int level, void **lp)
755
{
756
    int i;
757

    
758
    if (*lp == NULL) {
759
        return;
760
    }
761
    if (level == 0) {
762
        PageDesc *pd = *lp;
763
        for (i = 0; i < L2_SIZE; ++i) {
764
            pd[i].first_tb = NULL;
765
            invalidate_page_bitmap(pd + i);
766
        }
767
    } else {
768
        void **pp = *lp;
769
        for (i = 0; i < L2_SIZE; ++i) {
770
            page_flush_tb_1 (level - 1, pp + i);
771
        }
772
    }
773
}
774

    
775
static void page_flush_tb(void)
776
{
777
    int i;
778
    for (i = 0; i < V_L1_SIZE; i++) {
779
        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
780
    }
781
}
782

    
783
/* flush all the translation blocks */
784
/* XXX: tb_flush is currently not thread safe */
785
void tb_flush(CPUArchState *env1)
786
{
787
    CPUArchState *env;
788
#if defined(DEBUG_FLUSH)
789
    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
790
           (unsigned long)(code_gen_ptr - code_gen_buffer),
791
           nb_tbs, nb_tbs > 0 ?
792
           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
793
#endif
794
    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
795
        cpu_abort(env1, "Internal error: code buffer overflow\n");
796

    
797
    nb_tbs = 0;
798

    
799
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
800
        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
801
    }
802

    
803
    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
804
    page_flush_tb();
805

    
806
    code_gen_ptr = code_gen_buffer;
807
    /* XXX: flush processor icache at this point if cache flush is
808
       expensive */
809
    tb_flush_count++;
810
}
811

    
812
#ifdef DEBUG_TB_CHECK
813

    
814
static void tb_invalidate_check(target_ulong address)
815
{
816
    TranslationBlock *tb;
817
    int i;
818
    address &= TARGET_PAGE_MASK;
819
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
820
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
821
            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
822
                  address >= tb->pc + tb->size)) {
823
                printf("ERROR invalidate: address=" TARGET_FMT_lx
824
                       " PC=%08lx size=%04x\n",
825
                       address, (long)tb->pc, tb->size);
826
            }
827
        }
828
    }
829
}
830

    
831
/* verify that all the pages have correct rights for code */
832
static void tb_page_check(void)
833
{
834
    TranslationBlock *tb;
835
    int i, flags1, flags2;
836

    
837
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
838
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
839
            flags1 = page_get_flags(tb->pc);
840
            flags2 = page_get_flags(tb->pc + tb->size - 1);
841
            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
842
                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
843
                       (long)tb->pc, tb->size, flags1, flags2);
844
            }
845
        }
846
    }
847
}
848

    
849
#endif
850

    
851
/* invalidate one TB */
852
static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
853
                             int next_offset)
854
{
855
    TranslationBlock *tb1;
856
    for(;;) {
857
        tb1 = *ptb;
858
        if (tb1 == tb) {
859
            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
860
            break;
861
        }
862
        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
863
    }
864
}
865

    
866
static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
867
{
868
    TranslationBlock *tb1;
869
    unsigned int n1;
870

    
871
    for(;;) {
872
        tb1 = *ptb;
873
        n1 = (uintptr_t)tb1 & 3;
874
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
875
        if (tb1 == tb) {
876
            *ptb = tb1->page_next[n1];
877
            break;
878
        }
879
        ptb = &tb1->page_next[n1];
880
    }
881
}
882

    
883
static inline void tb_jmp_remove(TranslationBlock *tb, int n)
884
{
885
    TranslationBlock *tb1, **ptb;
886
    unsigned int n1;
887

    
888
    ptb = &tb->jmp_next[n];
889
    tb1 = *ptb;
890
    if (tb1) {
891
        /* find tb(n) in circular list */
892
        for(;;) {
893
            tb1 = *ptb;
894
            n1 = (uintptr_t)tb1 & 3;
895
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
896
            if (n1 == n && tb1 == tb)
897
                break;
898
            if (n1 == 2) {
899
                ptb = &tb1->jmp_first;
900
            } else {
901
                ptb = &tb1->jmp_next[n1];
902
            }
903
        }
904
        /* now we can suppress tb(n) from the list */
905
        *ptb = tb->jmp_next[n];
906

    
907
        tb->jmp_next[n] = NULL;
908
    }
909
}
910

    
911
/* reset the jump entry 'n' of a TB so that it is not chained to
912
   another TB */
913
static inline void tb_reset_jump(TranslationBlock *tb, int n)
914
{
915
    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
916
}
917

    
918
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
919
{
920
    CPUArchState *env;
921
    PageDesc *p;
922
    unsigned int h, n1;
923
    tb_page_addr_t phys_pc;
924
    TranslationBlock *tb1, *tb2;
925

    
926
    /* remove the TB from the hash list */
927
    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
928
    h = tb_phys_hash_func(phys_pc);
929
    tb_remove(&tb_phys_hash[h], tb,
930
              offsetof(TranslationBlock, phys_hash_next));
931

    
932
    /* remove the TB from the page list */
933
    if (tb->page_addr[0] != page_addr) {
934
        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
935
        tb_page_remove(&p->first_tb, tb);
936
        invalidate_page_bitmap(p);
937
    }
938
    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
939
        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
940
        tb_page_remove(&p->first_tb, tb);
941
        invalidate_page_bitmap(p);
942
    }
943

    
944
    tb_invalidated_flag = 1;
945

    
946
    /* remove the TB from the hash list */
947
    h = tb_jmp_cache_hash_func(tb->pc);
948
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
949
        if (env->tb_jmp_cache[h] == tb)
950
            env->tb_jmp_cache[h] = NULL;
951
    }
952

    
953
    /* suppress this TB from the two jump lists */
954
    tb_jmp_remove(tb, 0);
955
    tb_jmp_remove(tb, 1);
956

    
957
    /* suppress any remaining jumps to this TB */
958
    tb1 = tb->jmp_first;
959
    for(;;) {
960
        n1 = (uintptr_t)tb1 & 3;
961
        if (n1 == 2)
962
            break;
963
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
964
        tb2 = tb1->jmp_next[n1];
965
        tb_reset_jump(tb1, n1);
966
        tb1->jmp_next[n1] = NULL;
967
        tb1 = tb2;
968
    }
969
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
970

    
971
    tb_phys_invalidate_count++;
972
}
973

    
974
static inline void set_bits(uint8_t *tab, int start, int len)
975
{
976
    int end, mask, end1;
977

    
978
    end = start + len;
979
    tab += start >> 3;
980
    mask = 0xff << (start & 7);
981
    if ((start & ~7) == (end & ~7)) {
982
        if (start < end) {
983
            mask &= ~(0xff << (end & 7));
984
            *tab |= mask;
985
        }
986
    } else {
987
        *tab++ |= mask;
988
        start = (start + 8) & ~7;
989
        end1 = end & ~7;
990
        while (start < end1) {
991
            *tab++ = 0xff;
992
            start += 8;
993
        }
994
        if (start < end) {
995
            mask = ~(0xff << (end & 7));
996
            *tab |= mask;
997
        }
998
    }
999
}
1000

    
1001
static void build_page_bitmap(PageDesc *p)
1002
{
1003
    int n, tb_start, tb_end;
1004
    TranslationBlock *tb;
1005

    
1006
    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1007

    
1008
    tb = p->first_tb;
1009
    while (tb != NULL) {
1010
        n = (uintptr_t)tb & 3;
1011
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1012
        /* NOTE: this is subtle as a TB may span two physical pages */
1013
        if (n == 0) {
1014
            /* NOTE: tb_end may be after the end of the page, but
1015
               it is not a problem */
1016
            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1017
            tb_end = tb_start + tb->size;
1018
            if (tb_end > TARGET_PAGE_SIZE)
1019
                tb_end = TARGET_PAGE_SIZE;
1020
        } else {
1021
            tb_start = 0;
1022
            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1023
        }
1024
        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1025
        tb = tb->page_next[n];
1026
    }
1027
}
1028

    
1029
TranslationBlock *tb_gen_code(CPUArchState *env,
1030
                              target_ulong pc, target_ulong cs_base,
1031
                              int flags, int cflags)
1032
{
1033
    TranslationBlock *tb;
1034
    uint8_t *tc_ptr;
1035
    tb_page_addr_t phys_pc, phys_page2;
1036
    target_ulong virt_page2;
1037
    int code_gen_size;
1038

    
1039
    phys_pc = get_page_addr_code(env, pc);
1040
    tb = tb_alloc(pc);
1041
    if (!tb) {
1042
        /* flush must be done */
1043
        tb_flush(env);
1044
        /* cannot fail at this point */
1045
        tb = tb_alloc(pc);
1046
        /* Don't forget to invalidate previous TB info.  */
1047
        tb_invalidated_flag = 1;
1048
    }
1049
    tc_ptr = code_gen_ptr;
1050
    tb->tc_ptr = tc_ptr;
1051
    tb->cs_base = cs_base;
1052
    tb->flags = flags;
1053
    tb->cflags = cflags;
1054
    cpu_gen_code(env, tb, &code_gen_size);
1055
    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1056
                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1057

    
1058
    /* check next page if needed */
1059
    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1060
    phys_page2 = -1;
1061
    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1062
        phys_page2 = get_page_addr_code(env, virt_page2);
1063
    }
1064
    tb_link_page(tb, phys_pc, phys_page2);
1065
    return tb;
1066
}
1067

    
1068
/*
1069
 * Invalidate all TBs which intersect with the target physical address range
1070
 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1071
 * 'is_cpu_write_access' should be true if called from a real cpu write
1072
 * access: the virtual CPU will exit the current TB if code is modified inside
1073
 * this TB.
1074
 */
1075
void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1076
                              int is_cpu_write_access)
1077
{
1078
    while (start < end) {
1079
        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1080
        start &= TARGET_PAGE_MASK;
1081
        start += TARGET_PAGE_SIZE;
1082
    }
1083
}
1084

    
1085
/*
1086
 * Invalidate all TBs which intersect with the target physical address range
1087
 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1088
 * 'is_cpu_write_access' should be true if called from a real cpu write
1089
 * access: the virtual CPU will exit the current TB if code is modified inside
1090
 * this TB.
1091
 */
1092
void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1093
                                   int is_cpu_write_access)
1094
{
1095
    TranslationBlock *tb, *tb_next, *saved_tb;
1096
    CPUArchState *env = cpu_single_env;
1097
    tb_page_addr_t tb_start, tb_end;
1098
    PageDesc *p;
1099
    int n;
1100
#ifdef TARGET_HAS_PRECISE_SMC
1101
    int current_tb_not_found = is_cpu_write_access;
1102
    TranslationBlock *current_tb = NULL;
1103
    int current_tb_modified = 0;
1104
    target_ulong current_pc = 0;
1105
    target_ulong current_cs_base = 0;
1106
    int current_flags = 0;
1107
#endif /* TARGET_HAS_PRECISE_SMC */
1108

    
1109
    p = page_find(start >> TARGET_PAGE_BITS);
1110
    if (!p)
1111
        return;
1112
    if (!p->code_bitmap &&
1113
        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1114
        is_cpu_write_access) {
1115
        /* build code bitmap */
1116
        build_page_bitmap(p);
1117
    }
1118

    
1119
    /* we remove all the TBs in the range [start, end[ */
1120
    /* XXX: see if in some cases it could be faster to invalidate all the code */
1121
    tb = p->first_tb;
1122
    while (tb != NULL) {
1123
        n = (uintptr_t)tb & 3;
1124
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1125
        tb_next = tb->page_next[n];
1126
        /* NOTE: this is subtle as a TB may span two physical pages */
1127
        if (n == 0) {
1128
            /* NOTE: tb_end may be after the end of the page, but
1129
               it is not a problem */
1130
            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1131
            tb_end = tb_start + tb->size;
1132
        } else {
1133
            tb_start = tb->page_addr[1];
1134
            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1135
        }
1136
        if (!(tb_end <= start || tb_start >= end)) {
1137
#ifdef TARGET_HAS_PRECISE_SMC
1138
            if (current_tb_not_found) {
1139
                current_tb_not_found = 0;
1140
                current_tb = NULL;
1141
                if (env->mem_io_pc) {
1142
                    /* now we have a real cpu fault */
1143
                    current_tb = tb_find_pc(env->mem_io_pc);
1144
                }
1145
            }
1146
            if (current_tb == tb &&
1147
                (current_tb->cflags & CF_COUNT_MASK) != 1) {
1148
                /* If we are modifying the current TB, we must stop
1149
                its execution. We could be more precise by checking
1150
                that the modification is after the current PC, but it
1151
                would require a specialized function to partially
1152
                restore the CPU state */
1153

    
1154
                current_tb_modified = 1;
1155
                cpu_restore_state(current_tb, env, env->mem_io_pc);
1156
                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1157
                                     &current_flags);
1158
            }
1159
#endif /* TARGET_HAS_PRECISE_SMC */
1160
            /* we need to do that to handle the case where a signal
1161
               occurs while doing tb_phys_invalidate() */
1162
            saved_tb = NULL;
1163
            if (env) {
1164
                saved_tb = env->current_tb;
1165
                env->current_tb = NULL;
1166
            }
1167
            tb_phys_invalidate(tb, -1);
1168
            if (env) {
1169
                env->current_tb = saved_tb;
1170
                if (env->interrupt_request && env->current_tb)
1171
                    cpu_interrupt(env, env->interrupt_request);
1172
            }
1173
        }
1174
        tb = tb_next;
1175
    }
1176
#if !defined(CONFIG_USER_ONLY)
1177
    /* if no code remaining, no need to continue to use slow writes */
1178
    if (!p->first_tb) {
1179
        invalidate_page_bitmap(p);
1180
        if (is_cpu_write_access) {
1181
            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1182
        }
1183
    }
1184
#endif
1185
#ifdef TARGET_HAS_PRECISE_SMC
1186
    if (current_tb_modified) {
1187
        /* we generate a block containing just the instruction
1188
           modifying the memory. It will ensure that it cannot modify
1189
           itself */
1190
        env->current_tb = NULL;
1191
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1192
        cpu_resume_from_signal(env, NULL);
1193
    }
1194
#endif
1195
}
1196

    
1197
/* len must be <= 8 and start must be a multiple of len */
1198
static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1199
{
1200
    PageDesc *p;
1201
    int offset, b;
1202
#if 0
1203
    if (1) {
1204
        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1205
                  cpu_single_env->mem_io_vaddr, len,
1206
                  cpu_single_env->eip,
1207
                  cpu_single_env->eip +
1208
                  (intptr_t)cpu_single_env->segs[R_CS].base);
1209
    }
1210
#endif
1211
    p = page_find(start >> TARGET_PAGE_BITS);
1212
    if (!p)
1213
        return;
1214
    if (p->code_bitmap) {
1215
        offset = start & ~TARGET_PAGE_MASK;
1216
        b = p->code_bitmap[offset >> 3] >> (offset & 7);
1217
        if (b & ((1 << len) - 1))
1218
            goto do_invalidate;
1219
    } else {
1220
    do_invalidate:
1221
        tb_invalidate_phys_page_range(start, start + len, 1);
1222
    }
1223
}
1224

    
1225
#if !defined(CONFIG_SOFTMMU)
1226
static void tb_invalidate_phys_page(tb_page_addr_t addr,
1227
                                    uintptr_t pc, void *puc)
1228
{
1229
    TranslationBlock *tb;
1230
    PageDesc *p;
1231
    int n;
1232
#ifdef TARGET_HAS_PRECISE_SMC
1233
    TranslationBlock *current_tb = NULL;
1234
    CPUArchState *env = cpu_single_env;
1235
    int current_tb_modified = 0;
1236
    target_ulong current_pc = 0;
1237
    target_ulong current_cs_base = 0;
1238
    int current_flags = 0;
1239
#endif
1240

    
1241
    addr &= TARGET_PAGE_MASK;
1242
    p = page_find(addr >> TARGET_PAGE_BITS);
1243
    if (!p)
1244
        return;
1245
    tb = p->first_tb;
1246
#ifdef TARGET_HAS_PRECISE_SMC
1247
    if (tb && pc != 0) {
1248
        current_tb = tb_find_pc(pc);
1249
    }
1250
#endif
1251
    while (tb != NULL) {
1252
        n = (uintptr_t)tb & 3;
1253
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1254
#ifdef TARGET_HAS_PRECISE_SMC
1255
        if (current_tb == tb &&
1256
            (current_tb->cflags & CF_COUNT_MASK) != 1) {
1257
                /* If we are modifying the current TB, we must stop
1258
                   its execution. We could be more precise by checking
1259
                   that the modification is after the current PC, but it
1260
                   would require a specialized function to partially
1261
                   restore the CPU state */
1262

    
1263
            current_tb_modified = 1;
1264
            cpu_restore_state(current_tb, env, pc);
1265
            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1266
                                 &current_flags);
1267
        }
1268
#endif /* TARGET_HAS_PRECISE_SMC */
1269
        tb_phys_invalidate(tb, addr);
1270
        tb = tb->page_next[n];
1271
    }
1272
    p->first_tb = NULL;
1273
#ifdef TARGET_HAS_PRECISE_SMC
1274
    if (current_tb_modified) {
1275
        /* we generate a block containing just the instruction
1276
           modifying the memory. It will ensure that it cannot modify
1277
           itself */
1278
        env->current_tb = NULL;
1279
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1280
        cpu_resume_from_signal(env, puc);
1281
    }
1282
#endif
1283
}
1284
#endif
1285

    
1286
/* add the tb in the target page and protect it if necessary */
1287
static inline void tb_alloc_page(TranslationBlock *tb,
1288
                                 unsigned int n, tb_page_addr_t page_addr)
1289
{
1290
    PageDesc *p;
1291
#ifndef CONFIG_USER_ONLY
1292
    bool page_already_protected;
1293
#endif
1294

    
1295
    tb->page_addr[n] = page_addr;
1296
    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1297
    tb->page_next[n] = p->first_tb;
1298
#ifndef CONFIG_USER_ONLY
1299
    page_already_protected = p->first_tb != NULL;
1300
#endif
1301
    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1302
    invalidate_page_bitmap(p);
1303

    
1304
#if defined(TARGET_HAS_SMC) || 1
1305

    
1306
#if defined(CONFIG_USER_ONLY)
1307
    if (p->flags & PAGE_WRITE) {
1308
        target_ulong addr;
1309
        PageDesc *p2;
1310
        int prot;
1311

    
1312
        /* force the host page as non writable (writes will have a
1313
           page fault + mprotect overhead) */
1314
        page_addr &= qemu_host_page_mask;
1315
        prot = 0;
1316
        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1317
            addr += TARGET_PAGE_SIZE) {
1318

    
1319
            p2 = page_find (addr >> TARGET_PAGE_BITS);
1320
            if (!p2)
1321
                continue;
1322
            prot |= p2->flags;
1323
            p2->flags &= ~PAGE_WRITE;
1324
          }
1325
        mprotect(g2h(page_addr), qemu_host_page_size,
1326
                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1327
#ifdef DEBUG_TB_INVALIDATE
1328
        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1329
               page_addr);
1330
#endif
1331
    }
1332
#else
1333
    /* if some code is already present, then the pages are already
1334
       protected. So we handle the case where only the first TB is
1335
       allocated in a physical page */
1336
    if (!page_already_protected) {
1337
        tlb_protect_code(page_addr);
1338
    }
1339
#endif
1340

    
1341
#endif /* TARGET_HAS_SMC */
1342
}
1343

    
1344
/* add a new TB and link it to the physical page tables. phys_page2 is
1345
   (-1) to indicate that only one page contains the TB. */
1346
void tb_link_page(TranslationBlock *tb,
1347
                  tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1348
{
1349
    unsigned int h;
1350
    TranslationBlock **ptb;
1351

    
1352
    /* Grab the mmap lock to stop another thread invalidating this TB
1353
       before we are done.  */
1354
    mmap_lock();
1355
    /* add in the physical hash table */
1356
    h = tb_phys_hash_func(phys_pc);
1357
    ptb = &tb_phys_hash[h];
1358
    tb->phys_hash_next = *ptb;
1359
    *ptb = tb;
1360

    
1361
    /* add in the page list */
1362
    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1363
    if (phys_page2 != -1)
1364
        tb_alloc_page(tb, 1, phys_page2);
1365
    else
1366
        tb->page_addr[1] = -1;
1367

    
1368
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1369
    tb->jmp_next[0] = NULL;
1370
    tb->jmp_next[1] = NULL;
1371

    
1372
    /* init original jump addresses */
1373
    if (tb->tb_next_offset[0] != 0xffff)
1374
        tb_reset_jump(tb, 0);
1375
    if (tb->tb_next_offset[1] != 0xffff)
1376
        tb_reset_jump(tb, 1);
1377

    
1378
#ifdef DEBUG_TB_CHECK
1379
    tb_page_check();
1380
#endif
1381
    mmap_unlock();
1382
}
1383

    
1384
/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1385
   tb[1].tc_ptr. Return NULL if not found */
1386
TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1387
{
1388
    int m_min, m_max, m;
1389
    uintptr_t v;
1390
    TranslationBlock *tb;
1391

    
1392
    if (nb_tbs <= 0)
1393
        return NULL;
1394
    if (tc_ptr < (uintptr_t)code_gen_buffer ||
1395
        tc_ptr >= (uintptr_t)code_gen_ptr) {
1396
        return NULL;
1397
    }
1398
    /* binary search (cf Knuth) */
1399
    m_min = 0;
1400
    m_max = nb_tbs - 1;
1401
    while (m_min <= m_max) {
1402
        m = (m_min + m_max) >> 1;
1403
        tb = &tbs[m];
1404
        v = (uintptr_t)tb->tc_ptr;
1405
        if (v == tc_ptr)
1406
            return tb;
1407
        else if (tc_ptr < v) {
1408
            m_max = m - 1;
1409
        } else {
1410
            m_min = m + 1;
1411
        }
1412
    }
1413
    return &tbs[m_max];
1414
}
1415

    
1416
static void tb_reset_jump_recursive(TranslationBlock *tb);
1417

    
1418
static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1419
{
1420
    TranslationBlock *tb1, *tb_next, **ptb;
1421
    unsigned int n1;
1422

    
1423
    tb1 = tb->jmp_next[n];
1424
    if (tb1 != NULL) {
1425
        /* find head of list */
1426
        for(;;) {
1427
            n1 = (uintptr_t)tb1 & 3;
1428
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1429
            if (n1 == 2)
1430
                break;
1431
            tb1 = tb1->jmp_next[n1];
1432
        }
1433
        /* we are now sure now that tb jumps to tb1 */
1434
        tb_next = tb1;
1435

    
1436
        /* remove tb from the jmp_first list */
1437
        ptb = &tb_next->jmp_first;
1438
        for(;;) {
1439
            tb1 = *ptb;
1440
            n1 = (uintptr_t)tb1 & 3;
1441
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1442
            if (n1 == n && tb1 == tb)
1443
                break;
1444
            ptb = &tb1->jmp_next[n1];
1445
        }
1446
        *ptb = tb->jmp_next[n];
1447
        tb->jmp_next[n] = NULL;
1448

    
1449
        /* suppress the jump to next tb in generated code */
1450
        tb_reset_jump(tb, n);
1451

    
1452
        /* suppress jumps in the tb on which we could have jumped */
1453
        tb_reset_jump_recursive(tb_next);
1454
    }
1455
}
1456

    
1457
static void tb_reset_jump_recursive(TranslationBlock *tb)
1458
{
1459
    tb_reset_jump_recursive2(tb, 0);
1460
    tb_reset_jump_recursive2(tb, 1);
1461
}
1462

    
1463
#if defined(TARGET_HAS_ICE)
1464
#if defined(CONFIG_USER_ONLY)
1465
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1466
{
1467
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
1468
}
1469
#else
1470
void tb_invalidate_phys_addr(target_phys_addr_t addr)
1471
{
1472
    ram_addr_t ram_addr;
1473
    MemoryRegionSection *section;
1474

    
1475
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
1476
    if (!(memory_region_is_ram(section->mr)
1477
          || (section->mr->rom_device && section->mr->readable))) {
1478
        return;
1479
    }
1480
    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1481
        + memory_region_section_addr(section, addr);
1482
    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1483
}
1484

    
1485
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1486
{
1487
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1488
            (pc & ~TARGET_PAGE_MASK));
1489
}
1490
#endif
1491
#endif /* TARGET_HAS_ICE */
1492

    
1493
#if defined(CONFIG_USER_ONLY)
1494
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1495

    
1496
{
1497
}
1498

    
1499
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1500
                          int flags, CPUWatchpoint **watchpoint)
1501
{
1502
    return -ENOSYS;
1503
}
1504
#else
1505
/* Add a watchpoint.  */
1506
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1507
                          int flags, CPUWatchpoint **watchpoint)
1508
{
1509
    target_ulong len_mask = ~(len - 1);
1510
    CPUWatchpoint *wp;
1511

    
1512
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1513
    if ((len & (len - 1)) || (addr & ~len_mask) ||
1514
            len == 0 || len > TARGET_PAGE_SIZE) {
1515
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1516
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1517
        return -EINVAL;
1518
    }
1519
    wp = g_malloc(sizeof(*wp));
1520

    
1521
    wp->vaddr = addr;
1522
    wp->len_mask = len_mask;
1523
    wp->flags = flags;
1524

    
1525
    /* keep all GDB-injected watchpoints in front */
1526
    if (flags & BP_GDB)
1527
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1528
    else
1529
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1530

    
1531
    tlb_flush_page(env, addr);
1532

    
1533
    if (watchpoint)
1534
        *watchpoint = wp;
1535
    return 0;
1536
}
1537

    
1538
/* Remove a specific watchpoint.  */
1539
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1540
                          int flags)
1541
{
1542
    target_ulong len_mask = ~(len - 1);
1543
    CPUWatchpoint *wp;
1544

    
1545
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1546
        if (addr == wp->vaddr && len_mask == wp->len_mask
1547
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1548
            cpu_watchpoint_remove_by_ref(env, wp);
1549
            return 0;
1550
        }
1551
    }
1552
    return -ENOENT;
1553
}
1554

    
1555
/* Remove a specific watchpoint by reference.  */
1556
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1557
{
1558
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1559

    
1560
    tlb_flush_page(env, watchpoint->vaddr);
1561

    
1562
    g_free(watchpoint);
1563
}
1564

    
1565
/* Remove all matching watchpoints.  */
1566
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1567
{
1568
    CPUWatchpoint *wp, *next;
1569

    
1570
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1571
        if (wp->flags & mask)
1572
            cpu_watchpoint_remove_by_ref(env, wp);
1573
    }
1574
}
1575
#endif
1576

    
1577
/* Add a breakpoint.  */
1578
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1579
                          CPUBreakpoint **breakpoint)
1580
{
1581
#if defined(TARGET_HAS_ICE)
1582
    CPUBreakpoint *bp;
1583

    
1584
    bp = g_malloc(sizeof(*bp));
1585

    
1586
    bp->pc = pc;
1587
    bp->flags = flags;
1588

    
1589
    /* keep all GDB-injected breakpoints in front */
1590
    if (flags & BP_GDB)
1591
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1592
    else
1593
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1594

    
1595
    breakpoint_invalidate(env, pc);
1596

    
1597
    if (breakpoint)
1598
        *breakpoint = bp;
1599
    return 0;
1600
#else
1601
    return -ENOSYS;
1602
#endif
1603
}
1604

    
1605
/* Remove a specific breakpoint.  */
1606
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1607
{
1608
#if defined(TARGET_HAS_ICE)
1609
    CPUBreakpoint *bp;
1610

    
1611
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1612
        if (bp->pc == pc && bp->flags == flags) {
1613
            cpu_breakpoint_remove_by_ref(env, bp);
1614
            return 0;
1615
        }
1616
    }
1617
    return -ENOENT;
1618
#else
1619
    return -ENOSYS;
1620
#endif
1621
}
1622

    
1623
/* Remove a specific breakpoint by reference.  */
1624
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1625
{
1626
#if defined(TARGET_HAS_ICE)
1627
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1628

    
1629
    breakpoint_invalidate(env, breakpoint->pc);
1630

    
1631
    g_free(breakpoint);
1632
#endif
1633
}
1634

    
1635
/* Remove all matching breakpoints. */
1636
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1637
{
1638
#if defined(TARGET_HAS_ICE)
1639
    CPUBreakpoint *bp, *next;
1640

    
1641
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1642
        if (bp->flags & mask)
1643
            cpu_breakpoint_remove_by_ref(env, bp);
1644
    }
1645
#endif
1646
}
1647

    
1648
/* enable or disable single step mode. EXCP_DEBUG is returned by the
1649
   CPU loop after each instruction */
1650
void cpu_single_step(CPUArchState *env, int enabled)
1651
{
1652
#if defined(TARGET_HAS_ICE)
1653
    if (env->singlestep_enabled != enabled) {
1654
        env->singlestep_enabled = enabled;
1655
        if (kvm_enabled())
1656
            kvm_update_guest_debug(env, 0);
1657
        else {
1658
            /* must flush all the translated code to avoid inconsistencies */
1659
            /* XXX: only flush what is necessary */
1660
            tb_flush(env);
1661
        }
1662
    }
1663
#endif
1664
}
1665

    
1666
static void cpu_unlink_tb(CPUArchState *env)
1667
{
1668
    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
1669
       problem and hope the cpu will stop of its own accord.  For userspace
1670
       emulation this often isn't actually as bad as it sounds.  Often
1671
       signals are used primarily to interrupt blocking syscalls.  */
1672
    TranslationBlock *tb;
1673
    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1674

    
1675
    spin_lock(&interrupt_lock);
1676
    tb = env->current_tb;
1677
    /* if the cpu is currently executing code, we must unlink it and
1678
       all the potentially executing TB */
1679
    if (tb) {
1680
        env->current_tb = NULL;
1681
        tb_reset_jump_recursive(tb);
1682
    }
1683
    spin_unlock(&interrupt_lock);
1684
}
1685

    
1686
#ifndef CONFIG_USER_ONLY
1687
/* mask must never be zero, except for A20 change call */
1688
static void tcg_handle_interrupt(CPUArchState *env, int mask)
1689
{
1690
    int old_mask;
1691

    
1692
    old_mask = env->interrupt_request;
1693
    env->interrupt_request |= mask;
1694

    
1695
    /*
1696
     * If called from iothread context, wake the target cpu in
1697
     * case its halted.
1698
     */
1699
    if (!qemu_cpu_is_self(env)) {
1700
        qemu_cpu_kick(env);
1701
        return;
1702
    }
1703

    
1704
    if (use_icount) {
1705
        env->icount_decr.u16.high = 0xffff;
1706
        if (!can_do_io(env)
1707
            && (mask & ~old_mask) != 0) {
1708
            cpu_abort(env, "Raised interrupt while not in I/O function");
1709
        }
1710
    } else {
1711
        cpu_unlink_tb(env);
1712
    }
1713
}
1714

    
1715
CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1716

    
1717
#else /* CONFIG_USER_ONLY */
1718

    
1719
void cpu_interrupt(CPUArchState *env, int mask)
1720
{
1721
    env->interrupt_request |= mask;
1722
    cpu_unlink_tb(env);
1723
}
1724
#endif /* CONFIG_USER_ONLY */
1725

    
1726
void cpu_reset_interrupt(CPUArchState *env, int mask)
1727
{
1728
    env->interrupt_request &= ~mask;
1729
}
1730

    
1731
void cpu_exit(CPUArchState *env)
1732
{
1733
    env->exit_request = 1;
1734
    cpu_unlink_tb(env);
1735
}
1736

    
1737
void cpu_abort(CPUArchState *env, const char *fmt, ...)
1738
{
1739
    va_list ap;
1740
    va_list ap2;
1741

    
1742
    va_start(ap, fmt);
1743
    va_copy(ap2, ap);
1744
    fprintf(stderr, "qemu: fatal: ");
1745
    vfprintf(stderr, fmt, ap);
1746
    fprintf(stderr, "\n");
1747
#ifdef TARGET_I386
1748
    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1749
#else
1750
    cpu_dump_state(env, stderr, fprintf, 0);
1751
#endif
1752
    if (qemu_log_enabled()) {
1753
        qemu_log("qemu: fatal: ");
1754
        qemu_log_vprintf(fmt, ap2);
1755
        qemu_log("\n");
1756
#ifdef TARGET_I386
1757
        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1758
#else
1759
        log_cpu_state(env, 0);
1760
#endif
1761
        qemu_log_flush();
1762
        qemu_log_close();
1763
    }
1764
    va_end(ap2);
1765
    va_end(ap);
1766
#if defined(CONFIG_USER_ONLY)
1767
    {
1768
        struct sigaction act;
1769
        sigfillset(&act.sa_mask);
1770
        act.sa_handler = SIG_DFL;
1771
        sigaction(SIGABRT, &act, NULL);
1772
    }
1773
#endif
1774
    abort();
1775
}
1776

    
1777
CPUArchState *cpu_copy(CPUArchState *env)
1778
{
1779
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
1780
    CPUArchState *next_cpu = new_env->next_cpu;
1781
    int cpu_index = new_env->cpu_index;
1782
#if defined(TARGET_HAS_ICE)
1783
    CPUBreakpoint *bp;
1784
    CPUWatchpoint *wp;
1785
#endif
1786

    
1787
    memcpy(new_env, env, sizeof(CPUArchState));
1788

    
1789
    /* Preserve chaining and index. */
1790
    new_env->next_cpu = next_cpu;
1791
    new_env->cpu_index = cpu_index;
1792

    
1793
    /* Clone all break/watchpoints.
1794
       Note: Once we support ptrace with hw-debug register access, make sure
1795
       BP_CPU break/watchpoints are handled correctly on clone. */
1796
    QTAILQ_INIT(&env->breakpoints);
1797
    QTAILQ_INIT(&env->watchpoints);
1798
#if defined(TARGET_HAS_ICE)
1799
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1800
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1801
    }
1802
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1803
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1804
                              wp->flags, NULL);
1805
    }
1806
#endif
1807

    
1808
    return new_env;
1809
}
1810

    
1811
#if !defined(CONFIG_USER_ONLY)
1812
void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1813
{
1814
    unsigned int i;
1815

    
1816
    /* Discard jump cache entries for any tb which might potentially
1817
       overlap the flushed page.  */
1818
    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1819
    memset (&env->tb_jmp_cache[i], 0, 
1820
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1821

    
1822
    i = tb_jmp_cache_hash_page(addr);
1823
    memset (&env->tb_jmp_cache[i], 0, 
1824
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1825
}
1826

    
1827
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1828
                                      uintptr_t length)
1829
{
1830
    uintptr_t start1;
1831

    
1832
    /* we modify the TLB cache so that the dirty bit will be set again
1833
       when accessing the range */
1834
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1835
    /* Check that we don't span multiple blocks - this breaks the
1836
       address comparisons below.  */
1837
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1838
            != (end - 1) - start) {
1839
        abort();
1840
    }
1841
    cpu_tlb_reset_dirty_all(start1, length);
1842

    
1843
}
1844

    
1845
/* Note: start and end must be within the same ram block.  */
1846
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1847
                                     int dirty_flags)
1848
{
1849
    uintptr_t length;
1850

    
1851
    start &= TARGET_PAGE_MASK;
1852
    end = TARGET_PAGE_ALIGN(end);
1853

    
1854
    length = end - start;
1855
    if (length == 0)
1856
        return;
1857
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1858

    
1859
    if (tcg_enabled()) {
1860
        tlb_reset_dirty_range_all(start, end, length);
1861
    }
1862
}
1863

    
1864
int cpu_physical_memory_set_dirty_tracking(int enable)
1865
{
1866
    int ret = 0;
1867
    in_migration = enable;
1868
    return ret;
1869
}
1870

    
1871
target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1872
                                                   MemoryRegionSection *section,
1873
                                                   target_ulong vaddr,
1874
                                                   target_phys_addr_t paddr,
1875
                                                   int prot,
1876
                                                   target_ulong *address)
1877
{
1878
    target_phys_addr_t iotlb;
1879
    CPUWatchpoint *wp;
1880

    
1881
    if (memory_region_is_ram(section->mr)) {
1882
        /* Normal RAM.  */
1883
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1884
            + memory_region_section_addr(section, paddr);
1885
        if (!section->readonly) {
1886
            iotlb |= phys_section_notdirty;
1887
        } else {
1888
            iotlb |= phys_section_rom;
1889
        }
1890
    } else {
1891
        /* IO handlers are currently passed a physical address.
1892
           It would be nice to pass an offset from the base address
1893
           of that region.  This would avoid having to special case RAM,
1894
           and avoid full address decoding in every device.
1895
           We can't use the high bits of pd for this because
1896
           IO_MEM_ROMD uses these as a ram address.  */
1897
        iotlb = section - phys_sections;
1898
        iotlb += memory_region_section_addr(section, paddr);
1899
    }
1900

    
1901
    /* Make accesses to pages with watchpoints go via the
1902
       watchpoint trap routines.  */
1903
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1904
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1905
            /* Avoid trapping reads of pages with a write breakpoint. */
1906
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1907
                iotlb = phys_section_watch + paddr;
1908
                *address |= TLB_MMIO;
1909
                break;
1910
            }
1911
        }
1912
    }
1913

    
1914
    return iotlb;
1915
}
1916

    
1917
#else
1918
/*
1919
 * Walks guest process memory "regions" one by one
1920
 * and calls callback function 'fn' for each region.
1921
 */
1922

    
1923
struct walk_memory_regions_data
1924
{
1925
    walk_memory_regions_fn fn;
1926
    void *priv;
1927
    uintptr_t start;
1928
    int prot;
1929
};
1930

    
1931
static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1932
                                   abi_ulong end, int new_prot)
1933
{
1934
    if (data->start != -1ul) {
1935
        int rc = data->fn(data->priv, data->start, end, data->prot);
1936
        if (rc != 0) {
1937
            return rc;
1938
        }
1939
    }
1940

    
1941
    data->start = (new_prot ? end : -1ul);
1942
    data->prot = new_prot;
1943

    
1944
    return 0;
1945
}
1946

    
1947
static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1948
                                 abi_ulong base, int level, void **lp)
1949
{
1950
    abi_ulong pa;
1951
    int i, rc;
1952

    
1953
    if (*lp == NULL) {
1954
        return walk_memory_regions_end(data, base, 0);
1955
    }
1956

    
1957
    if (level == 0) {
1958
        PageDesc *pd = *lp;
1959
        for (i = 0; i < L2_SIZE; ++i) {
1960
            int prot = pd[i].flags;
1961

    
1962
            pa = base | (i << TARGET_PAGE_BITS);
1963
            if (prot != data->prot) {
1964
                rc = walk_memory_regions_end(data, pa, prot);
1965
                if (rc != 0) {
1966
                    return rc;
1967
                }
1968
            }
1969
        }
1970
    } else {
1971
        void **pp = *lp;
1972
        for (i = 0; i < L2_SIZE; ++i) {
1973
            pa = base | ((abi_ulong)i <<
1974
                (TARGET_PAGE_BITS + L2_BITS * level));
1975
            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1976
            if (rc != 0) {
1977
                return rc;
1978
            }
1979
        }
1980
    }
1981

    
1982
    return 0;
1983
}
1984

    
1985
int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1986
{
1987
    struct walk_memory_regions_data data;
1988
    uintptr_t i;
1989

    
1990
    data.fn = fn;
1991
    data.priv = priv;
1992
    data.start = -1ul;
1993
    data.prot = 0;
1994

    
1995
    for (i = 0; i < V_L1_SIZE; i++) {
1996
        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1997
                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1998
        if (rc != 0) {
1999
            return rc;
2000
        }
2001
    }
2002

    
2003
    return walk_memory_regions_end(&data, 0, 0);
2004
}
2005

    
2006
static int dump_region(void *priv, abi_ulong start,
2007
    abi_ulong end, unsigned long prot)
2008
{
2009
    FILE *f = (FILE *)priv;
2010

    
2011
    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2012
        " "TARGET_ABI_FMT_lx" %c%c%c\n",
2013
        start, end, end - start,
2014
        ((prot & PAGE_READ) ? 'r' : '-'),
2015
        ((prot & PAGE_WRITE) ? 'w' : '-'),
2016
        ((prot & PAGE_EXEC) ? 'x' : '-'));
2017

    
2018
    return (0);
2019
}
2020

    
2021
/* dump memory mappings */
2022
void page_dump(FILE *f)
2023
{
2024
    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2025
            "start", "end", "size", "prot");
2026
    walk_memory_regions(f, dump_region);
2027
}
2028

    
2029
int page_get_flags(target_ulong address)
2030
{
2031
    PageDesc *p;
2032

    
2033
    p = page_find(address >> TARGET_PAGE_BITS);
2034
    if (!p)
2035
        return 0;
2036
    return p->flags;
2037
}
2038

    
2039
/* Modify the flags of a page and invalidate the code if necessary.
2040
   The flag PAGE_WRITE_ORG is positioned automatically depending
2041
   on PAGE_WRITE.  The mmap_lock should already be held.  */
2042
void page_set_flags(target_ulong start, target_ulong end, int flags)
2043
{
2044
    target_ulong addr, len;
2045

    
2046
    /* This function should never be called with addresses outside the
2047
       guest address space.  If this assert fires, it probably indicates
2048
       a missing call to h2g_valid.  */
2049
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2050
    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2051
#endif
2052
    assert(start < end);
2053

    
2054
    start = start & TARGET_PAGE_MASK;
2055
    end = TARGET_PAGE_ALIGN(end);
2056

    
2057
    if (flags & PAGE_WRITE) {
2058
        flags |= PAGE_WRITE_ORG;
2059
    }
2060

    
2061
    for (addr = start, len = end - start;
2062
         len != 0;
2063
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2064
        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2065

    
2066
        /* If the write protection bit is set, then we invalidate
2067
           the code inside.  */
2068
        if (!(p->flags & PAGE_WRITE) &&
2069
            (flags & PAGE_WRITE) &&
2070
            p->first_tb) {
2071
            tb_invalidate_phys_page(addr, 0, NULL);
2072
        }
2073
        p->flags = flags;
2074
    }
2075
}
2076

    
2077
int page_check_range(target_ulong start, target_ulong len, int flags)
2078
{
2079
    PageDesc *p;
2080
    target_ulong end;
2081
    target_ulong addr;
2082

    
2083
    /* This function should never be called with addresses outside the
2084
       guest address space.  If this assert fires, it probably indicates
2085
       a missing call to h2g_valid.  */
2086
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2087
    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2088
#endif
2089

    
2090
    if (len == 0) {
2091
        return 0;
2092
    }
2093
    if (start + len - 1 < start) {
2094
        /* We've wrapped around.  */
2095
        return -1;
2096
    }
2097

    
2098
    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2099
    start = start & TARGET_PAGE_MASK;
2100

    
2101
    for (addr = start, len = end - start;
2102
         len != 0;
2103
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2104
        p = page_find(addr >> TARGET_PAGE_BITS);
2105
        if( !p )
2106
            return -1;
2107
        if( !(p->flags & PAGE_VALID) )
2108
            return -1;
2109

    
2110
        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2111
            return -1;
2112
        if (flags & PAGE_WRITE) {
2113
            if (!(p->flags & PAGE_WRITE_ORG))
2114
                return -1;
2115
            /* unprotect the page if it was put read-only because it
2116
               contains translated code */
2117
            if (!(p->flags & PAGE_WRITE)) {
2118
                if (!page_unprotect(addr, 0, NULL))
2119
                    return -1;
2120
            }
2121
            return 0;
2122
        }
2123
    }
2124
    return 0;
2125
}
2126

    
2127
/* called from signal handler: invalidate the code and unprotect the
2128
   page. Return TRUE if the fault was successfully handled. */
2129
int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2130
{
2131
    unsigned int prot;
2132
    PageDesc *p;
2133
    target_ulong host_start, host_end, addr;
2134

    
2135
    /* Technically this isn't safe inside a signal handler.  However we
2136
       know this only ever happens in a synchronous SEGV handler, so in
2137
       practice it seems to be ok.  */
2138
    mmap_lock();
2139

    
2140
    p = page_find(address >> TARGET_PAGE_BITS);
2141
    if (!p) {
2142
        mmap_unlock();
2143
        return 0;
2144
    }
2145

    
2146
    /* if the page was really writable, then we change its
2147
       protection back to writable */
2148
    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2149
        host_start = address & qemu_host_page_mask;
2150
        host_end = host_start + qemu_host_page_size;
2151

    
2152
        prot = 0;
2153
        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2154
            p = page_find(addr >> TARGET_PAGE_BITS);
2155
            p->flags |= PAGE_WRITE;
2156
            prot |= p->flags;
2157

    
2158
            /* and since the content will be modified, we must invalidate
2159
               the corresponding translated code. */
2160
            tb_invalidate_phys_page(addr, pc, puc);
2161
#ifdef DEBUG_TB_CHECK
2162
            tb_invalidate_check(addr);
2163
#endif
2164
        }
2165
        mprotect((void *)g2h(host_start), qemu_host_page_size,
2166
                 prot & PAGE_BITS);
2167

    
2168
        mmap_unlock();
2169
        return 1;
2170
    }
2171
    mmap_unlock();
2172
    return 0;
2173
}
2174
#endif /* defined(CONFIG_USER_ONLY) */
2175

    
2176
#if !defined(CONFIG_USER_ONLY)
2177

    
2178
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2179
typedef struct subpage_t {
2180
    MemoryRegion iomem;
2181
    target_phys_addr_t base;
2182
    uint16_t sub_section[TARGET_PAGE_SIZE];
2183
} subpage_t;
2184

    
2185
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2186
                             uint16_t section);
2187
static subpage_t *subpage_init(target_phys_addr_t base);
2188
static void destroy_page_desc(uint16_t section_index)
2189
{
2190
    MemoryRegionSection *section = &phys_sections[section_index];
2191
    MemoryRegion *mr = section->mr;
2192

    
2193
    if (mr->subpage) {
2194
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
2195
        memory_region_destroy(&subpage->iomem);
2196
        g_free(subpage);
2197
    }
2198
}
2199

    
2200
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2201
{
2202
    unsigned i;
2203
    PhysPageEntry *p;
2204

    
2205
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
2206
        return;
2207
    }
2208

    
2209
    p = phys_map_nodes[lp->ptr];
2210
    for (i = 0; i < L2_SIZE; ++i) {
2211
        if (!p[i].is_leaf) {
2212
            destroy_l2_mapping(&p[i], level - 1);
2213
        } else {
2214
            destroy_page_desc(p[i].ptr);
2215
        }
2216
    }
2217
    lp->is_leaf = 0;
2218
    lp->ptr = PHYS_MAP_NODE_NIL;
2219
}
2220

    
2221
static void destroy_all_mappings(void)
2222
{
2223
    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2224
    phys_map_nodes_reset();
2225
}
2226

    
2227
static uint16_t phys_section_add(MemoryRegionSection *section)
2228
{
2229
    if (phys_sections_nb == phys_sections_nb_alloc) {
2230
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2231
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
2232
                                phys_sections_nb_alloc);
2233
    }
2234
    phys_sections[phys_sections_nb] = *section;
2235
    return phys_sections_nb++;
2236
}
2237

    
2238
static void phys_sections_clear(void)
2239
{
2240
    phys_sections_nb = 0;
2241
}
2242

    
2243
static void register_subpage(MemoryRegionSection *section)
2244
{
2245
    subpage_t *subpage;
2246
    target_phys_addr_t base = section->offset_within_address_space
2247
        & TARGET_PAGE_MASK;
2248
    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2249
    MemoryRegionSection subsection = {
2250
        .offset_within_address_space = base,
2251
        .size = TARGET_PAGE_SIZE,
2252
    };
2253
    target_phys_addr_t start, end;
2254

    
2255
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2256

    
2257
    if (!(existing->mr->subpage)) {
2258
        subpage = subpage_init(base);
2259
        subsection.mr = &subpage->iomem;
2260
        phys_page_set(base >> TARGET_PAGE_BITS, 1,
2261
                      phys_section_add(&subsection));
2262
    } else {
2263
        subpage = container_of(existing->mr, subpage_t, iomem);
2264
    }
2265
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2266
    end = start + section->size - 1;
2267
    subpage_register(subpage, start, end, phys_section_add(section));
2268
}
2269

    
2270

    
2271
static void register_multipage(MemoryRegionSection *section)
2272
{
2273
    target_phys_addr_t start_addr = section->offset_within_address_space;
2274
    ram_addr_t size = section->size;
2275
    target_phys_addr_t addr;
2276
    uint16_t section_index = phys_section_add(section);
2277

    
2278
    assert(size);
2279

    
2280
    addr = start_addr;
2281
    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2282
                  section_index);
2283
}
2284

    
2285
void cpu_register_physical_memory_log(MemoryRegionSection *section,
2286
                                      bool readonly)
2287
{
2288
    MemoryRegionSection now = *section, remain = *section;
2289

    
2290
    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2291
        || (now.size < TARGET_PAGE_SIZE)) {
2292
        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2293
                       - now.offset_within_address_space,
2294
                       now.size);
2295
        register_subpage(&now);
2296
        remain.size -= now.size;
2297
        remain.offset_within_address_space += now.size;
2298
        remain.offset_within_region += now.size;
2299
    }
2300
    while (remain.size >= TARGET_PAGE_SIZE) {
2301
        now = remain;
2302
        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2303
            now.size = TARGET_PAGE_SIZE;
2304
            register_subpage(&now);
2305
        } else {
2306
            now.size &= TARGET_PAGE_MASK;
2307
            register_multipage(&now);
2308
        }
2309
        remain.size -= now.size;
2310
        remain.offset_within_address_space += now.size;
2311
        remain.offset_within_region += now.size;
2312
    }
2313
    now = remain;
2314
    if (now.size) {
2315
        register_subpage(&now);
2316
    }
2317
}
2318

    
2319

    
2320
void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2321
{
2322
    if (kvm_enabled())
2323
        kvm_coalesce_mmio_region(addr, size);
2324
}
2325

    
2326
void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2327
{
2328
    if (kvm_enabled())
2329
        kvm_uncoalesce_mmio_region(addr, size);
2330
}
2331

    
2332
void qemu_flush_coalesced_mmio_buffer(void)
2333
{
2334
    if (kvm_enabled())
2335
        kvm_flush_coalesced_mmio_buffer();
2336
}
2337

    
2338
#if defined(__linux__) && !defined(TARGET_S390X)
2339

    
2340
#include <sys/vfs.h>
2341

    
2342
#define HUGETLBFS_MAGIC       0x958458f6
2343

    
2344
static long gethugepagesize(const char *path)
2345
{
2346
    struct statfs fs;
2347
    int ret;
2348

    
2349
    do {
2350
        ret = statfs(path, &fs);
2351
    } while (ret != 0 && errno == EINTR);
2352

    
2353
    if (ret != 0) {
2354
        perror(path);
2355
        return 0;
2356
    }
2357

    
2358
    if (fs.f_type != HUGETLBFS_MAGIC)
2359
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2360

    
2361
    return fs.f_bsize;
2362
}
2363

    
2364
static void *file_ram_alloc(RAMBlock *block,
2365
                            ram_addr_t memory,
2366
                            const char *path)
2367
{
2368
    char *filename;
2369
    void *area;
2370
    int fd;
2371
#ifdef MAP_POPULATE
2372
    int flags;
2373
#endif
2374
    unsigned long hpagesize;
2375

    
2376
    hpagesize = gethugepagesize(path);
2377
    if (!hpagesize) {
2378
        return NULL;
2379
    }
2380

    
2381
    if (memory < hpagesize) {
2382
        return NULL;
2383
    }
2384

    
2385
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
2386
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2387
        return NULL;
2388
    }
2389

    
2390
    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2391
        return NULL;
2392
    }
2393

    
2394
    fd = mkstemp(filename);
2395
    if (fd < 0) {
2396
        perror("unable to create backing store for hugepages");
2397
        free(filename);
2398
        return NULL;
2399
    }
2400
    unlink(filename);
2401
    free(filename);
2402

    
2403
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
2404

    
2405
    /*
2406
     * ftruncate is not supported by hugetlbfs in older
2407
     * hosts, so don't bother bailing out on errors.
2408
     * If anything goes wrong with it under other filesystems,
2409
     * mmap will fail.
2410
     */
2411
    if (ftruncate(fd, memory))
2412
        perror("ftruncate");
2413

    
2414
#ifdef MAP_POPULATE
2415
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2416
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
2417
     * to sidestep this quirk.
2418
     */
2419
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2420
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2421
#else
2422
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2423
#endif
2424
    if (area == MAP_FAILED) {
2425
        perror("file_ram_alloc: can't mmap RAM pages");
2426
        close(fd);
2427
        return (NULL);
2428
    }
2429
    block->fd = fd;
2430
    return area;
2431
}
2432
#endif
2433

    
2434
static ram_addr_t find_ram_offset(ram_addr_t size)
2435
{
2436
    RAMBlock *block, *next_block;
2437
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2438

    
2439
    if (QLIST_EMPTY(&ram_list.blocks))
2440
        return 0;
2441

    
2442
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2443
        ram_addr_t end, next = RAM_ADDR_MAX;
2444

    
2445
        end = block->offset + block->length;
2446

    
2447
        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2448
            if (next_block->offset >= end) {
2449
                next = MIN(next, next_block->offset);
2450
            }
2451
        }
2452
        if (next - end >= size && next - end < mingap) {
2453
            offset = end;
2454
            mingap = next - end;
2455
        }
2456
    }
2457

    
2458
    if (offset == RAM_ADDR_MAX) {
2459
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2460
                (uint64_t)size);
2461
        abort();
2462
    }
2463

    
2464
    return offset;
2465
}
2466

    
2467
static ram_addr_t last_ram_offset(void)
2468
{
2469
    RAMBlock *block;
2470
    ram_addr_t last = 0;
2471

    
2472
    QLIST_FOREACH(block, &ram_list.blocks, next)
2473
        last = MAX(last, block->offset + block->length);
2474

    
2475
    return last;
2476
}
2477

    
2478
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2479
{
2480
    int ret;
2481
    QemuOpts *machine_opts;
2482

    
2483
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2484
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2485
    if (machine_opts &&
2486
        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2487
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2488
        if (ret) {
2489
            perror("qemu_madvise");
2490
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2491
                            "but dump_guest_core=off specified\n");
2492
        }
2493
    }
2494
}
2495

    
2496
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2497
{
2498
    RAMBlock *new_block, *block;
2499

    
2500
    new_block = NULL;
2501
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2502
        if (block->offset == addr) {
2503
            new_block = block;
2504
            break;
2505
        }
2506
    }
2507
    assert(new_block);
2508
    assert(!new_block->idstr[0]);
2509

    
2510
    if (dev) {
2511
        char *id = qdev_get_dev_path(dev);
2512
        if (id) {
2513
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2514
            g_free(id);
2515
        }
2516
    }
2517
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2518

    
2519
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2520
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2521
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2522
                    new_block->idstr);
2523
            abort();
2524
        }
2525
    }
2526
}
2527

    
2528
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2529
                                   MemoryRegion *mr)
2530
{
2531
    RAMBlock *new_block;
2532

    
2533
    size = TARGET_PAGE_ALIGN(size);
2534
    new_block = g_malloc0(sizeof(*new_block));
2535

    
2536
    new_block->mr = mr;
2537
    new_block->offset = find_ram_offset(size);
2538
    if (host) {
2539
        new_block->host = host;
2540
        new_block->flags |= RAM_PREALLOC_MASK;
2541
    } else {
2542
        if (mem_path) {
2543
#if defined (__linux__) && !defined(TARGET_S390X)
2544
            new_block->host = file_ram_alloc(new_block, size, mem_path);
2545
            if (!new_block->host) {
2546
                new_block->host = qemu_vmalloc(size);
2547
                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2548
            }
2549
#else
2550
            fprintf(stderr, "-mem-path option unsupported\n");
2551
            exit(1);
2552
#endif
2553
        } else {
2554
            if (xen_enabled()) {
2555
                xen_ram_alloc(new_block->offset, size, mr);
2556
            } else if (kvm_enabled()) {
2557
                /* some s390/kvm configurations have special constraints */
2558
                new_block->host = kvm_vmalloc(size);
2559
            } else {
2560
                new_block->host = qemu_vmalloc(size);
2561
            }
2562
            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2563
        }
2564
    }
2565
    new_block->length = size;
2566

    
2567
    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2568

    
2569
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2570
                                       last_ram_offset() >> TARGET_PAGE_BITS);
2571
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2572
           0, size >> TARGET_PAGE_BITS);
2573
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2574

    
2575
    qemu_ram_setup_dump(new_block->host, size);
2576

    
2577
    if (kvm_enabled())
2578
        kvm_setup_guest_memory(new_block->host, size);
2579

    
2580
    return new_block->offset;
2581
}
2582

    
2583
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2584
{
2585
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
2586
}
2587

    
2588
void qemu_ram_free_from_ptr(ram_addr_t addr)
2589
{
2590
    RAMBlock *block;
2591

    
2592
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2593
        if (addr == block->offset) {
2594
            QLIST_REMOVE(block, next);
2595
            g_free(block);
2596
            return;
2597
        }
2598
    }
2599
}
2600

    
2601
void qemu_ram_free(ram_addr_t addr)
2602
{
2603
    RAMBlock *block;
2604

    
2605
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2606
        if (addr == block->offset) {
2607
            QLIST_REMOVE(block, next);
2608
            if (block->flags & RAM_PREALLOC_MASK) {
2609
                ;
2610
            } else if (mem_path) {
2611
#if defined (__linux__) && !defined(TARGET_S390X)
2612
                if (block->fd) {
2613
                    munmap(block->host, block->length);
2614
                    close(block->fd);
2615
                } else {
2616
                    qemu_vfree(block->host);
2617
                }
2618
#else
2619
                abort();
2620
#endif
2621
            } else {
2622
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2623
                munmap(block->host, block->length);
2624
#else
2625
                if (xen_enabled()) {
2626
                    xen_invalidate_map_cache_entry(block->host);
2627
                } else {
2628
                    qemu_vfree(block->host);
2629
                }
2630
#endif
2631
            }
2632
            g_free(block);
2633
            return;
2634
        }
2635
    }
2636

    
2637
}
2638

    
2639
#ifndef _WIN32
2640
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2641
{
2642
    RAMBlock *block;
2643
    ram_addr_t offset;
2644
    int flags;
2645
    void *area, *vaddr;
2646

    
2647
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2648
        offset = addr - block->offset;
2649
        if (offset < block->length) {
2650
            vaddr = block->host + offset;
2651
            if (block->flags & RAM_PREALLOC_MASK) {
2652
                ;
2653
            } else {
2654
                flags = MAP_FIXED;
2655
                munmap(vaddr, length);
2656
                if (mem_path) {
2657
#if defined(__linux__) && !defined(TARGET_S390X)
2658
                    if (block->fd) {
2659
#ifdef MAP_POPULATE
2660
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2661
                            MAP_PRIVATE;
2662
#else
2663
                        flags |= MAP_PRIVATE;
2664
#endif
2665
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2666
                                    flags, block->fd, offset);
2667
                    } else {
2668
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2669
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2670
                                    flags, -1, 0);
2671
                    }
2672
#else
2673
                    abort();
2674
#endif
2675
                } else {
2676
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2677
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
2678
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2679
                                flags, -1, 0);
2680
#else
2681
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2682
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2683
                                flags, -1, 0);
2684
#endif
2685
                }
2686
                if (area != vaddr) {
2687
                    fprintf(stderr, "Could not remap addr: "
2688
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2689
                            length, addr);
2690
                    exit(1);
2691
                }
2692
                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2693
                qemu_ram_setup_dump(vaddr, length);
2694
            }
2695
            return;
2696
        }
2697
    }
2698
}
2699
#endif /* !_WIN32 */
2700

    
2701
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2702
   With the exception of the softmmu code in this file, this should
2703
   only be used for local memory (e.g. video ram) that the device owns,
2704
   and knows it isn't going to access beyond the end of the block.
2705

2706
   It should not be used for general purpose DMA.
2707
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2708
 */
2709
void *qemu_get_ram_ptr(ram_addr_t addr)
2710
{
2711
    RAMBlock *block;
2712

    
2713
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2714
        if (addr - block->offset < block->length) {
2715
            /* Move this entry to to start of the list.  */
2716
            if (block != QLIST_FIRST(&ram_list.blocks)) {
2717
                QLIST_REMOVE(block, next);
2718
                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2719
            }
2720
            if (xen_enabled()) {
2721
                /* We need to check if the requested address is in the RAM
2722
                 * because we don't want to map the entire memory in QEMU.
2723
                 * In that case just map until the end of the page.
2724
                 */
2725
                if (block->offset == 0) {
2726
                    return xen_map_cache(addr, 0, 0);
2727
                } else if (block->host == NULL) {
2728
                    block->host =
2729
                        xen_map_cache(block->offset, block->length, 1);
2730
                }
2731
            }
2732
            return block->host + (addr - block->offset);
2733
        }
2734
    }
2735

    
2736
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2737
    abort();
2738

    
2739
    return NULL;
2740
}
2741

    
2742
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2743
 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2744
 */
2745
void *qemu_safe_ram_ptr(ram_addr_t addr)
2746
{
2747
    RAMBlock *block;
2748

    
2749
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2750
        if (addr - block->offset < block->length) {
2751
            if (xen_enabled()) {
2752
                /* We need to check if the requested address is in the RAM
2753
                 * because we don't want to map the entire memory in QEMU.
2754
                 * In that case just map until the end of the page.
2755
                 */
2756
                if (block->offset == 0) {
2757
                    return xen_map_cache(addr, 0, 0);
2758
                } else if (block->host == NULL) {
2759
                    block->host =
2760
                        xen_map_cache(block->offset, block->length, 1);
2761
                }
2762
            }
2763
            return block->host + (addr - block->offset);
2764
        }
2765
    }
2766

    
2767
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2768
    abort();
2769

    
2770
    return NULL;
2771
}
2772

    
2773
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2774
 * but takes a size argument */
2775
void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2776
{
2777
    if (*size == 0) {
2778
        return NULL;
2779
    }
2780
    if (xen_enabled()) {
2781
        return xen_map_cache(addr, *size, 1);
2782
    } else {
2783
        RAMBlock *block;
2784

    
2785
        QLIST_FOREACH(block, &ram_list.blocks, next) {
2786
            if (addr - block->offset < block->length) {
2787
                if (addr - block->offset + *size > block->length)
2788
                    *size = block->length - addr + block->offset;
2789
                return block->host + (addr - block->offset);
2790
            }
2791
        }
2792

    
2793
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2794
        abort();
2795
    }
2796
}
2797

    
2798
void qemu_put_ram_ptr(void *addr)
2799
{
2800
    trace_qemu_put_ram_ptr(addr);
2801
}
2802

    
2803
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2804
{
2805
    RAMBlock *block;
2806
    uint8_t *host = ptr;
2807

    
2808
    if (xen_enabled()) {
2809
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2810
        return 0;
2811
    }
2812

    
2813
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2814
        /* This case append when the block is not mapped. */
2815
        if (block->host == NULL) {
2816
            continue;
2817
        }
2818
        if (host - block->host < block->length) {
2819
            *ram_addr = block->offset + (host - block->host);
2820
            return 0;
2821
        }
2822
    }
2823

    
2824
    return -1;
2825
}
2826

    
2827
/* Some of the softmmu routines need to translate from a host pointer
2828
   (typically a TLB entry) back to a ram offset.  */
2829
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2830
{
2831
    ram_addr_t ram_addr;
2832

    
2833
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2834
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
2835
        abort();
2836
    }
2837
    return ram_addr;
2838
}
2839

    
2840
static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2841
                                    unsigned size)
2842
{
2843
#ifdef DEBUG_UNASSIGNED
2844
    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2845
#endif
2846
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2847
    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2848
#endif
2849
    return 0;
2850
}
2851

    
2852
static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2853
                                 uint64_t val, unsigned size)
2854
{
2855
#ifdef DEBUG_UNASSIGNED
2856
    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2857
#endif
2858
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2859
    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2860
#endif
2861
}
2862

    
2863
static const MemoryRegionOps unassigned_mem_ops = {
2864
    .read = unassigned_mem_read,
2865
    .write = unassigned_mem_write,
2866
    .endianness = DEVICE_NATIVE_ENDIAN,
2867
};
2868

    
2869
static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2870
                               unsigned size)
2871
{
2872
    abort();
2873
}
2874

    
2875
static void error_mem_write(void *opaque, target_phys_addr_t addr,
2876
                            uint64_t value, unsigned size)
2877
{
2878
    abort();
2879
}
2880

    
2881
static const MemoryRegionOps error_mem_ops = {
2882
    .read = error_mem_read,
2883
    .write = error_mem_write,
2884
    .endianness = DEVICE_NATIVE_ENDIAN,
2885
};
2886

    
2887
static const MemoryRegionOps rom_mem_ops = {
2888
    .read = error_mem_read,
2889
    .write = unassigned_mem_write,
2890
    .endianness = DEVICE_NATIVE_ENDIAN,
2891
};
2892

    
2893
static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2894
                               uint64_t val, unsigned size)
2895
{
2896
    int dirty_flags;
2897
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2898
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2899
#if !defined(CONFIG_USER_ONLY)
2900
        tb_invalidate_phys_page_fast(ram_addr, size);
2901
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2902
#endif
2903
    }
2904
    switch (size) {
2905
    case 1:
2906
        stb_p(qemu_get_ram_ptr(ram_addr), val);
2907
        break;
2908
    case 2:
2909
        stw_p(qemu_get_ram_ptr(ram_addr), val);
2910
        break;
2911
    case 4:
2912
        stl_p(qemu_get_ram_ptr(ram_addr), val);
2913
        break;
2914
    default:
2915
        abort();
2916
    }
2917
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2918
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2919
    /* we remove the notdirty callback only if the code has been
2920
       flushed */
2921
    if (dirty_flags == 0xff)
2922
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2923
}
2924

    
2925
static const MemoryRegionOps notdirty_mem_ops = {
2926
    .read = error_mem_read,
2927
    .write = notdirty_mem_write,
2928
    .endianness = DEVICE_NATIVE_ENDIAN,
2929
};
2930

    
2931
/* Generate a debug exception if a watchpoint has been hit.  */
2932
static void check_watchpoint(int offset, int len_mask, int flags)
2933
{
2934
    CPUArchState *env = cpu_single_env;
2935
    target_ulong pc, cs_base;
2936
    TranslationBlock *tb;
2937
    target_ulong vaddr;
2938
    CPUWatchpoint *wp;
2939
    int cpu_flags;
2940

    
2941
    if (env->watchpoint_hit) {
2942
        /* We re-entered the check after replacing the TB. Now raise
2943
         * the debug interrupt so that is will trigger after the
2944
         * current instruction. */
2945
        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2946
        return;
2947
    }
2948
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2949
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2950
        if ((vaddr == (wp->vaddr & len_mask) ||
2951
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2952
            wp->flags |= BP_WATCHPOINT_HIT;
2953
            if (!env->watchpoint_hit) {
2954
                env->watchpoint_hit = wp;
2955
                tb = tb_find_pc(env->mem_io_pc);
2956
                if (!tb) {
2957
                    cpu_abort(env, "check_watchpoint: could not find TB for "
2958
                              "pc=%p", (void *)env->mem_io_pc);
2959
                }
2960
                cpu_restore_state(tb, env, env->mem_io_pc);
2961
                tb_phys_invalidate(tb, -1);
2962
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2963
                    env->exception_index = EXCP_DEBUG;
2964
                    cpu_loop_exit(env);
2965
                } else {
2966
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2967
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2968
                    cpu_resume_from_signal(env, NULL);
2969
                }
2970
            }
2971
        } else {
2972
            wp->flags &= ~BP_WATCHPOINT_HIT;
2973
        }
2974
    }
2975
}
2976

    
2977
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2978
   so these check for a hit then pass through to the normal out-of-line
2979
   phys routines.  */
2980
static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2981
                               unsigned size)
2982
{
2983
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2984
    switch (size) {
2985
    case 1: return ldub_phys(addr);
2986
    case 2: return lduw_phys(addr);
2987
    case 4: return ldl_phys(addr);
2988
    default: abort();
2989
    }
2990
}
2991

    
2992
static void watch_mem_write(void *opaque, target_phys_addr_t addr,
2993
                            uint64_t val, unsigned size)
2994
{
2995
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2996
    switch (size) {
2997
    case 1:
2998
        stb_phys(addr, val);
2999
        break;
3000
    case 2:
3001
        stw_phys(addr, val);
3002
        break;
3003
    case 4:
3004
        stl_phys(addr, val);
3005
        break;
3006
    default: abort();
3007
    }
3008
}
3009

    
3010
static const MemoryRegionOps watch_mem_ops = {
3011
    .read = watch_mem_read,
3012
    .write = watch_mem_write,
3013
    .endianness = DEVICE_NATIVE_ENDIAN,
3014
};
3015

    
3016
static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3017
                             unsigned len)
3018
{
3019
    subpage_t *mmio = opaque;
3020
    unsigned int idx = SUBPAGE_IDX(addr);
3021
    MemoryRegionSection *section;
3022
#if defined(DEBUG_SUBPAGE)
3023
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3024
           mmio, len, addr, idx);
3025
#endif
3026

    
3027
    section = &phys_sections[mmio->sub_section[idx]];
3028
    addr += mmio->base;
3029
    addr -= section->offset_within_address_space;
3030
    addr += section->offset_within_region;
3031
    return io_mem_read(section->mr, addr, len);
3032
}
3033

    
3034
static void subpage_write(void *opaque, target_phys_addr_t addr,
3035
                          uint64_t value, unsigned len)
3036
{
3037
    subpage_t *mmio = opaque;
3038
    unsigned int idx = SUBPAGE_IDX(addr);
3039
    MemoryRegionSection *section;
3040
#if defined(DEBUG_SUBPAGE)
3041
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3042
           " idx %d value %"PRIx64"\n",
3043
           __func__, mmio, len, addr, idx, value);
3044
#endif
3045

    
3046
    section = &phys_sections[mmio->sub_section[idx]];
3047
    addr += mmio->base;
3048
    addr -= section->offset_within_address_space;
3049
    addr += section->offset_within_region;
3050
    io_mem_write(section->mr, addr, value, len);
3051
}
3052

    
3053
static const MemoryRegionOps subpage_ops = {
3054
    .read = subpage_read,
3055
    .write = subpage_write,
3056
    .endianness = DEVICE_NATIVE_ENDIAN,
3057
};
3058

    
3059
static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3060
                                 unsigned size)
3061
{
3062
    ram_addr_t raddr = addr;
3063
    void *ptr = qemu_get_ram_ptr(raddr);
3064
    switch (size) {
3065
    case 1: return ldub_p(ptr);
3066
    case 2: return lduw_p(ptr);
3067
    case 4: return ldl_p(ptr);
3068
    default: abort();
3069
    }
3070
}
3071

    
3072
static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3073
                              uint64_t value, unsigned size)
3074
{
3075
    ram_addr_t raddr = addr;
3076
    void *ptr = qemu_get_ram_ptr(raddr);
3077
    switch (size) {
3078
    case 1: return stb_p(ptr, value);
3079
    case 2: return stw_p(ptr, value);
3080
    case 4: return stl_p(ptr, value);
3081
    default: abort();
3082
    }
3083
}
3084

    
3085
static const MemoryRegionOps subpage_ram_ops = {
3086
    .read = subpage_ram_read,
3087
    .write = subpage_ram_write,
3088
    .endianness = DEVICE_NATIVE_ENDIAN,
3089
};
3090

    
3091
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3092
                             uint16_t section)
3093
{
3094
    int idx, eidx;
3095

    
3096
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3097
        return -1;
3098
    idx = SUBPAGE_IDX(start);
3099
    eidx = SUBPAGE_IDX(end);
3100
#if defined(DEBUG_SUBPAGE)
3101
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3102
           mmio, start, end, idx, eidx, memory);
3103
#endif
3104
    if (memory_region_is_ram(phys_sections[section].mr)) {
3105
        MemoryRegionSection new_section = phys_sections[section];
3106
        new_section.mr = &io_mem_subpage_ram;
3107
        section = phys_section_add(&new_section);
3108
    }
3109
    for (; idx <= eidx; idx++) {
3110
        mmio->sub_section[idx] = section;
3111
    }
3112

    
3113
    return 0;
3114
}
3115

    
3116
static subpage_t *subpage_init(target_phys_addr_t base)
3117
{
3118
    subpage_t *mmio;
3119

    
3120
    mmio = g_malloc0(sizeof(subpage_t));
3121

    
3122
    mmio->base = base;
3123
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3124
                          "subpage", TARGET_PAGE_SIZE);
3125
    mmio->iomem.subpage = true;
3126
#if defined(DEBUG_SUBPAGE)
3127
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3128
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3129
#endif
3130
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3131

    
3132
    return mmio;
3133
}
3134

    
3135
static uint16_t dummy_section(MemoryRegion *mr)
3136
{
3137
    MemoryRegionSection section = {
3138
        .mr = mr,
3139
        .offset_within_address_space = 0,
3140
        .offset_within_region = 0,
3141
        .size = UINT64_MAX,
3142
    };
3143

    
3144
    return phys_section_add(&section);
3145
}
3146

    
3147
MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3148
{
3149
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3150
}
3151

    
3152
static void io_mem_init(void)
3153
{
3154
    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3155
    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3156
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3157
                          "unassigned", UINT64_MAX);
3158
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3159
                          "notdirty", UINT64_MAX);
3160
    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3161
                          "subpage-ram", UINT64_MAX);
3162
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3163
                          "watch", UINT64_MAX);
3164
}
3165

    
3166
static void core_begin(MemoryListener *listener)
3167
{
3168
    destroy_all_mappings();
3169
    phys_sections_clear();
3170
    phys_map.ptr = PHYS_MAP_NODE_NIL;
3171
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
3172
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
3173
    phys_section_rom = dummy_section(&io_mem_rom);
3174
    phys_section_watch = dummy_section(&io_mem_watch);
3175
}
3176

    
3177
static void core_commit(MemoryListener *listener)
3178
{
3179
    CPUArchState *env;
3180

    
3181
    /* since each CPU stores ram addresses in its TLB cache, we must
3182
       reset the modified entries */
3183
    /* XXX: slow ! */
3184
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
3185
        tlb_flush(env, 1);
3186
    }
3187
}
3188

    
3189
static void core_region_add(MemoryListener *listener,
3190
                            MemoryRegionSection *section)
3191
{
3192
    cpu_register_physical_memory_log(section, section->readonly);
3193
}
3194

    
3195
static void core_region_del(MemoryListener *listener,
3196
                            MemoryRegionSection *section)
3197
{
3198
}
3199

    
3200
static void core_region_nop(MemoryListener *listener,
3201
                            MemoryRegionSection *section)
3202
{
3203
    cpu_register_physical_memory_log(section, section->readonly);
3204
}
3205

    
3206
static void core_log_start(MemoryListener *listener,
3207
                           MemoryRegionSection *section)
3208
{
3209
}
3210

    
3211
static void core_log_stop(MemoryListener *listener,
3212
                          MemoryRegionSection *section)
3213
{
3214
}
3215

    
3216
static void core_log_sync(MemoryListener *listener,
3217
                          MemoryRegionSection *section)
3218
{
3219
}
3220

    
3221
static void core_log_global_start(MemoryListener *listener)
3222
{
3223
    cpu_physical_memory_set_dirty_tracking(1);
3224
}
3225

    
3226
static void core_log_global_stop(MemoryListener *listener)
3227
{
3228
    cpu_physical_memory_set_dirty_tracking(0);
3229
}
3230

    
3231
static void core_eventfd_add(MemoryListener *listener,
3232
                             MemoryRegionSection *section,
3233
                             bool match_data, uint64_t data, EventNotifier *e)
3234
{
3235
}
3236

    
3237
static void core_eventfd_del(MemoryListener *listener,
3238
                             MemoryRegionSection *section,
3239
                             bool match_data, uint64_t data, EventNotifier *e)
3240
{
3241
}
3242

    
3243
static void io_begin(MemoryListener *listener)
3244
{
3245
}
3246

    
3247
static void io_commit(MemoryListener *listener)
3248
{
3249
}
3250

    
3251
static void io_region_add(MemoryListener *listener,
3252
                          MemoryRegionSection *section)
3253
{
3254
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3255

    
3256
    mrio->mr = section->mr;
3257
    mrio->offset = section->offset_within_region;
3258
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3259
                 section->offset_within_address_space, section->size);
3260
    ioport_register(&mrio->iorange);
3261
}
3262

    
3263
static void io_region_del(MemoryListener *listener,
3264
                          MemoryRegionSection *section)
3265
{
3266
    isa_unassign_ioport(section->offset_within_address_space, section->size);
3267
}
3268

    
3269
static void io_region_nop(MemoryListener *listener,
3270
                          MemoryRegionSection *section)
3271
{
3272
}
3273

    
3274
static void io_log_start(MemoryListener *listener,
3275
                         MemoryRegionSection *section)
3276
{
3277
}
3278

    
3279
static void io_log_stop(MemoryListener *listener,
3280
                        MemoryRegionSection *section)
3281
{
3282
}
3283

    
3284
static void io_log_sync(MemoryListener *listener,
3285
                        MemoryRegionSection *section)
3286
{
3287
}
3288

    
3289
static void io_log_global_start(MemoryListener *listener)
3290
{
3291
}
3292

    
3293
static void io_log_global_stop(MemoryListener *listener)
3294
{
3295
}
3296

    
3297
static void io_eventfd_add(MemoryListener *listener,
3298
                           MemoryRegionSection *section,
3299
                           bool match_data, uint64_t data, EventNotifier *e)
3300
{
3301
}
3302

    
3303
static void io_eventfd_del(MemoryListener *listener,
3304
                           MemoryRegionSection *section,
3305
                           bool match_data, uint64_t data, EventNotifier *e)
3306
{
3307
}
3308

    
3309
static MemoryListener core_memory_listener = {
3310
    .begin = core_begin,
3311
    .commit = core_commit,
3312
    .region_add = core_region_add,
3313
    .region_del = core_region_del,
3314
    .region_nop = core_region_nop,
3315
    .log_start = core_log_start,
3316
    .log_stop = core_log_stop,
3317
    .log_sync = core_log_sync,
3318
    .log_global_start = core_log_global_start,
3319
    .log_global_stop = core_log_global_stop,
3320
    .eventfd_add = core_eventfd_add,
3321
    .eventfd_del = core_eventfd_del,
3322
    .priority = 0,
3323
};
3324

    
3325
static MemoryListener io_memory_listener = {
3326
    .begin = io_begin,
3327
    .commit = io_commit,
3328
    .region_add = io_region_add,
3329
    .region_del = io_region_del,
3330
    .region_nop = io_region_nop,
3331
    .log_start = io_log_start,
3332
    .log_stop = io_log_stop,
3333
    .log_sync = io_log_sync,
3334
    .log_global_start = io_log_global_start,
3335
    .log_global_stop = io_log_global_stop,
3336
    .eventfd_add = io_eventfd_add,
3337
    .eventfd_del = io_eventfd_del,
3338
    .priority = 0,
3339
};
3340

    
3341
static void memory_map_init(void)
3342
{
3343
    system_memory = g_malloc(sizeof(*system_memory));
3344
    memory_region_init(system_memory, "system", INT64_MAX);
3345
    set_system_memory_map(system_memory);
3346

    
3347
    system_io = g_malloc(sizeof(*system_io));
3348
    memory_region_init(system_io, "io", 65536);
3349
    set_system_io_map(system_io);
3350

    
3351
    memory_listener_register(&core_memory_listener, system_memory);
3352
    memory_listener_register(&io_memory_listener, system_io);
3353
}
3354

    
3355
MemoryRegion *get_system_memory(void)
3356
{
3357
    return system_memory;
3358
}
3359

    
3360
MemoryRegion *get_system_io(void)
3361
{
3362
    return system_io;
3363
}
3364

    
3365
#endif /* !defined(CONFIG_USER_ONLY) */
3366

    
3367
/* physical memory access (slow version, mainly for debug) */
3368
#if defined(CONFIG_USER_ONLY)
3369
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3370
                        uint8_t *buf, int len, int is_write)
3371
{
3372
    int l, flags;
3373
    target_ulong page;
3374
    void * p;
3375

    
3376
    while (len > 0) {
3377
        page = addr & TARGET_PAGE_MASK;
3378
        l = (page + TARGET_PAGE_SIZE) - addr;
3379
        if (l > len)
3380
            l = len;
3381
        flags = page_get_flags(page);
3382
        if (!(flags & PAGE_VALID))
3383
            return -1;
3384
        if (is_write) {
3385
            if (!(flags & PAGE_WRITE))
3386
                return -1;
3387
            /* XXX: this code should not depend on lock_user */
3388
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3389
                return -1;
3390
            memcpy(p, buf, l);
3391
            unlock_user(p, addr, l);
3392
        } else {
3393
            if (!(flags & PAGE_READ))
3394
                return -1;
3395
            /* XXX: this code should not depend on lock_user */
3396
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3397
                return -1;
3398
            memcpy(buf, p, l);
3399
            unlock_user(p, addr, 0);
3400
        }
3401
        len -= l;
3402
        buf += l;
3403
        addr += l;
3404
    }
3405
    return 0;
3406
}
3407

    
3408
#else
3409
void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3410
                            int len, int is_write)
3411
{
3412
    int l;
3413
    uint8_t *ptr;
3414
    uint32_t val;
3415
    target_phys_addr_t page;
3416
    MemoryRegionSection *section;
3417

    
3418
    while (len > 0) {
3419
        page = addr & TARGET_PAGE_MASK;
3420
        l = (page + TARGET_PAGE_SIZE) - addr;
3421
        if (l > len)
3422
            l = len;
3423
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3424

    
3425
        if (is_write) {
3426
            if (!memory_region_is_ram(section->mr)) {
3427
                target_phys_addr_t addr1;
3428
                addr1 = memory_region_section_addr(section, addr);
3429
                /* XXX: could force cpu_single_env to NULL to avoid
3430
                   potential bugs */
3431
                if (l >= 4 && ((addr1 & 3) == 0)) {
3432
                    /* 32 bit write access */
3433
                    val = ldl_p(buf);
3434
                    io_mem_write(section->mr, addr1, val, 4);
3435
                    l = 4;
3436
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3437
                    /* 16 bit write access */
3438
                    val = lduw_p(buf);
3439
                    io_mem_write(section->mr, addr1, val, 2);
3440
                    l = 2;
3441
                } else {
3442
                    /* 8 bit write access */
3443
                    val = ldub_p(buf);
3444
                    io_mem_write(section->mr, addr1, val, 1);
3445
                    l = 1;
3446
                }
3447
            } else if (!section->readonly) {
3448
                ram_addr_t addr1;
3449
                addr1 = memory_region_get_ram_addr(section->mr)
3450
                    + memory_region_section_addr(section, addr);
3451
                /* RAM case */
3452
                ptr = qemu_get_ram_ptr(addr1);
3453
                memcpy(ptr, buf, l);
3454
                if (!cpu_physical_memory_is_dirty(addr1)) {
3455
                    /* invalidate code */
3456
                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3457
                    /* set dirty bit */
3458
                    cpu_physical_memory_set_dirty_flags(
3459
                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3460
                }
3461
                qemu_put_ram_ptr(ptr);
3462
            }
3463
        } else {
3464
            if (!(memory_region_is_ram(section->mr) ||
3465
                  memory_region_is_romd(section->mr))) {
3466
                target_phys_addr_t addr1;
3467
                /* I/O case */
3468
                addr1 = memory_region_section_addr(section, addr);
3469
                if (l >= 4 && ((addr1 & 3) == 0)) {
3470
                    /* 32 bit read access */
3471
                    val = io_mem_read(section->mr, addr1, 4);
3472
                    stl_p(buf, val);
3473
                    l = 4;
3474
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3475
                    /* 16 bit read access */
3476
                    val = io_mem_read(section->mr, addr1, 2);
3477
                    stw_p(buf, val);
3478
                    l = 2;
3479
                } else {
3480
                    /* 8 bit read access */
3481
                    val = io_mem_read(section->mr, addr1, 1);
3482
                    stb_p(buf, val);
3483
                    l = 1;
3484
                }
3485
            } else {
3486
                /* RAM case */
3487
                ptr = qemu_get_ram_ptr(section->mr->ram_addr
3488
                                       + memory_region_section_addr(section,
3489
                                                                    addr));
3490
                memcpy(buf, ptr, l);
3491
                qemu_put_ram_ptr(ptr);
3492
            }
3493
        }
3494
        len -= l;
3495
        buf += l;
3496
        addr += l;
3497
    }
3498
}
3499

    
3500
/* used for ROM loading : can write in RAM and ROM */
3501
void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3502
                                   const uint8_t *buf, int len)
3503
{
3504
    int l;
3505
    uint8_t *ptr;
3506
    target_phys_addr_t page;
3507
    MemoryRegionSection *section;
3508

    
3509
    while (len > 0) {
3510
        page = addr & TARGET_PAGE_MASK;
3511
        l = (page + TARGET_PAGE_SIZE) - addr;
3512
        if (l > len)
3513
            l = len;
3514
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3515

    
3516
        if (!(memory_region_is_ram(section->mr) ||
3517
              memory_region_is_romd(section->mr))) {
3518
            /* do nothing */
3519
        } else {
3520
            unsigned long addr1;
3521
            addr1 = memory_region_get_ram_addr(section->mr)
3522
                + memory_region_section_addr(section, addr);
3523
            /* ROM/RAM case */
3524
            ptr = qemu_get_ram_ptr(addr1);
3525
            memcpy(ptr, buf, l);
3526
            qemu_put_ram_ptr(ptr);
3527
        }
3528
        len -= l;
3529
        buf += l;
3530
        addr += l;
3531
    }
3532
}
3533

    
3534
typedef struct {
3535
    void *buffer;
3536
    target_phys_addr_t addr;
3537
    target_phys_addr_t len;
3538
} BounceBuffer;
3539

    
3540
static BounceBuffer bounce;
3541

    
3542
typedef struct MapClient {
3543
    void *opaque;
3544
    void (*callback)(void *opaque);
3545
    QLIST_ENTRY(MapClient) link;
3546
} MapClient;
3547

    
3548
static QLIST_HEAD(map_client_list, MapClient) map_client_list
3549
    = QLIST_HEAD_INITIALIZER(map_client_list);
3550

    
3551
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3552
{
3553
    MapClient *client = g_malloc(sizeof(*client));
3554

    
3555
    client->opaque = opaque;
3556
    client->callback = callback;
3557
    QLIST_INSERT_HEAD(&map_client_list, client, link);
3558
    return client;
3559
}
3560

    
3561
void cpu_unregister_map_client(void *_client)
3562
{
3563
    MapClient *client = (MapClient *)_client;
3564

    
3565
    QLIST_REMOVE(client, link);
3566
    g_free(client);
3567
}
3568

    
3569
static void cpu_notify_map_clients(void)
3570
{
3571
    MapClient *client;
3572

    
3573
    while (!QLIST_EMPTY(&map_client_list)) {
3574
        client = QLIST_FIRST(&map_client_list);
3575
        client->callback(client->opaque);
3576
        cpu_unregister_map_client(client);
3577
    }
3578
}
3579

    
3580
/* Map a physical memory region into a host virtual address.
3581
 * May map a subset of the requested range, given by and returned in *plen.
3582
 * May return NULL if resources needed to perform the mapping are exhausted.
3583
 * Use only for reads OR writes - not for read-modify-write operations.
3584
 * Use cpu_register_map_client() to know when retrying the map operation is
3585
 * likely to succeed.
3586
 */
3587
void *cpu_physical_memory_map(target_phys_addr_t addr,
3588
                              target_phys_addr_t *plen,
3589
                              int is_write)
3590
{
3591
    target_phys_addr_t len = *plen;
3592
    target_phys_addr_t todo = 0;
3593
    int l;
3594
    target_phys_addr_t page;
3595
    MemoryRegionSection *section;
3596
    ram_addr_t raddr = RAM_ADDR_MAX;
3597
    ram_addr_t rlen;
3598
    void *ret;
3599

    
3600
    while (len > 0) {
3601
        page = addr & TARGET_PAGE_MASK;
3602
        l = (page + TARGET_PAGE_SIZE) - addr;
3603
        if (l > len)
3604
            l = len;
3605
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3606

    
3607
        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3608
            if (todo || bounce.buffer) {
3609
                break;
3610
            }
3611
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3612
            bounce.addr = addr;
3613
            bounce.len = l;
3614
            if (!is_write) {
3615
                cpu_physical_memory_read(addr, bounce.buffer, l);
3616
            }
3617

    
3618
            *plen = l;
3619
            return bounce.buffer;
3620
        }
3621
        if (!todo) {
3622
            raddr = memory_region_get_ram_addr(section->mr)
3623
                + memory_region_section_addr(section, addr);
3624
        }
3625

    
3626
        len -= l;
3627
        addr += l;
3628
        todo += l;
3629
    }
3630
    rlen = todo;
3631
    ret = qemu_ram_ptr_length(raddr, &rlen);
3632
    *plen = rlen;
3633
    return ret;
3634
}
3635

    
3636
/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3637
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3638
 * the amount of memory that was actually read or written by the caller.
3639
 */
3640
void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3641
                               int is_write, target_phys_addr_t access_len)
3642
{
3643
    if (buffer != bounce.buffer) {
3644
        if (is_write) {
3645
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3646
            while (access_len) {
3647
                unsigned l;
3648
                l = TARGET_PAGE_SIZE;
3649
                if (l > access_len)
3650
                    l = access_len;
3651
                if (!cpu_physical_memory_is_dirty(addr1)) {
3652
                    /* invalidate code */
3653
                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3654
                    /* set dirty bit */
3655
                    cpu_physical_memory_set_dirty_flags(
3656
                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3657
                }
3658
                addr1 += l;
3659
                access_len -= l;
3660
            }
3661
        }
3662
        if (xen_enabled()) {
3663
            xen_invalidate_map_cache_entry(buffer);
3664
        }
3665
        return;
3666
    }
3667
    if (is_write) {
3668
        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3669
    }
3670
    qemu_vfree(bounce.buffer);
3671
    bounce.buffer = NULL;
3672
    cpu_notify_map_clients();
3673
}
3674

    
3675
/* warning: addr must be aligned */
3676
static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3677
                                         enum device_endian endian)
3678
{
3679
    uint8_t *ptr;
3680
    uint32_t val;
3681
    MemoryRegionSection *section;
3682

    
3683
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3684

    
3685
    if (!(memory_region_is_ram(section->mr) ||
3686
          memory_region_is_romd(section->mr))) {
3687
        /* I/O case */
3688
        addr = memory_region_section_addr(section, addr);
3689
        val = io_mem_read(section->mr, addr, 4);
3690
#if defined(TARGET_WORDS_BIGENDIAN)
3691
        if (endian == DEVICE_LITTLE_ENDIAN) {
3692
            val = bswap32(val);
3693
        }
3694
#else
3695
        if (endian == DEVICE_BIG_ENDIAN) {
3696
            val = bswap32(val);
3697
        }
3698
#endif
3699
    } else {
3700
        /* RAM case */
3701
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3702
                                & TARGET_PAGE_MASK)
3703
                               + memory_region_section_addr(section, addr));
3704
        switch (endian) {
3705
        case DEVICE_LITTLE_ENDIAN:
3706
            val = ldl_le_p(ptr);
3707
            break;
3708
        case DEVICE_BIG_ENDIAN:
3709
            val = ldl_be_p(ptr);
3710
            break;
3711
        default:
3712
            val = ldl_p(ptr);
3713
            break;
3714
        }
3715
    }
3716
    return val;
3717
}
3718

    
3719
uint32_t ldl_phys(target_phys_addr_t addr)
3720
{
3721
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3722
}
3723

    
3724
uint32_t ldl_le_phys(target_phys_addr_t addr)
3725
{
3726
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3727
}
3728

    
3729
uint32_t ldl_be_phys(target_phys_addr_t addr)
3730
{
3731
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3732
}
3733

    
3734
/* warning: addr must be aligned */
3735
static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3736
                                         enum device_endian endian)
3737
{
3738
    uint8_t *ptr;
3739
    uint64_t val;
3740
    MemoryRegionSection *section;
3741

    
3742
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3743

    
3744
    if (!(memory_region_is_ram(section->mr) ||
3745
          memory_region_is_romd(section->mr))) {
3746
        /* I/O case */
3747
        addr = memory_region_section_addr(section, addr);
3748

    
3749
        /* XXX This is broken when device endian != cpu endian.
3750
               Fix and add "endian" variable check */
3751
#ifdef TARGET_WORDS_BIGENDIAN
3752
        val = io_mem_read(section->mr, addr, 4) << 32;
3753
        val |= io_mem_read(section->mr, addr + 4, 4);
3754
#else
3755
        val = io_mem_read(section->mr, addr, 4);
3756
        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3757
#endif
3758
    } else {
3759
        /* RAM case */
3760
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3761
                                & TARGET_PAGE_MASK)
3762
                               + memory_region_section_addr(section, addr));
3763
        switch (endian) {
3764
        case DEVICE_LITTLE_ENDIAN:
3765
            val = ldq_le_p(ptr);
3766
            break;
3767
        case DEVICE_BIG_ENDIAN:
3768
            val = ldq_be_p(ptr);
3769
            break;
3770
        default:
3771
            val = ldq_p(ptr);
3772
            break;
3773
        }
3774
    }
3775
    return val;
3776
}
3777

    
3778
uint64_t ldq_phys(target_phys_addr_t addr)
3779
{
3780
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3781
}
3782

    
3783
uint64_t ldq_le_phys(target_phys_addr_t addr)
3784
{
3785
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3786
}
3787

    
3788
uint64_t ldq_be_phys(target_phys_addr_t addr)
3789
{
3790
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3791
}
3792

    
3793
/* XXX: optimize */
3794
uint32_t ldub_phys(target_phys_addr_t addr)
3795
{
3796
    uint8_t val;
3797
    cpu_physical_memory_read(addr, &val, 1);
3798
    return val;
3799
}
3800

    
3801
/* warning: addr must be aligned */
3802
static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3803
                                          enum device_endian endian)
3804
{
3805
    uint8_t *ptr;
3806
    uint64_t val;
3807
    MemoryRegionSection *section;
3808

    
3809
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3810

    
3811
    if (!(memory_region_is_ram(section->mr) ||
3812
          memory_region_is_romd(section->mr))) {
3813
        /* I/O case */
3814
        addr = memory_region_section_addr(section, addr);
3815
        val = io_mem_read(section->mr, addr, 2);
3816
#if defined(TARGET_WORDS_BIGENDIAN)
3817
        if (endian == DEVICE_LITTLE_ENDIAN) {
3818
            val = bswap16(val);
3819
        }
3820
#else
3821
        if (endian == DEVICE_BIG_ENDIAN) {
3822
            val = bswap16(val);
3823
        }
3824
#endif
3825
    } else {
3826
        /* RAM case */
3827
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3828
                                & TARGET_PAGE_MASK)
3829
                               + memory_region_section_addr(section, addr));
3830
        switch (endian) {
3831
        case DEVICE_LITTLE_ENDIAN:
3832
            val = lduw_le_p(ptr);
3833
            break;
3834
        case DEVICE_BIG_ENDIAN:
3835
            val = lduw_be_p(ptr);
3836
            break;
3837
        default:
3838
            val = lduw_p(ptr);
3839
            break;
3840
        }
3841
    }
3842
    return val;
3843
}
3844

    
3845
uint32_t lduw_phys(target_phys_addr_t addr)
3846
{
3847
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3848
}
3849

    
3850
uint32_t lduw_le_phys(target_phys_addr_t addr)
3851
{
3852
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3853
}
3854

    
3855
uint32_t lduw_be_phys(target_phys_addr_t addr)
3856
{
3857
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3858
}
3859

    
3860
/* warning: addr must be aligned. The ram page is not masked as dirty
3861
   and the code inside is not invalidated. It is useful if the dirty
3862
   bits are used to track modified PTEs */
3863
void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3864
{
3865
    uint8_t *ptr;
3866
    MemoryRegionSection *section;
3867

    
3868
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3869

    
3870
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3871
        addr = memory_region_section_addr(section, addr);
3872
        if (memory_region_is_ram(section->mr)) {
3873
            section = &phys_sections[phys_section_rom];
3874
        }
3875
        io_mem_write(section->mr, addr, val, 4);
3876
    } else {
3877
        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3878
                               & TARGET_PAGE_MASK)
3879
            + memory_region_section_addr(section, addr);
3880
        ptr = qemu_get_ram_ptr(addr1);
3881
        stl_p(ptr, val);
3882

    
3883
        if (unlikely(in_migration)) {
3884
            if (!cpu_physical_memory_is_dirty(addr1)) {
3885
                /* invalidate code */
3886
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3887
                /* set dirty bit */
3888
                cpu_physical_memory_set_dirty_flags(
3889
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
3890
            }
3891
        }
3892
    }
3893
}
3894

    
3895
void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3896
{
3897
    uint8_t *ptr;
3898
    MemoryRegionSection *section;
3899

    
3900
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3901

    
3902
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3903
        addr = memory_region_section_addr(section, addr);
3904
        if (memory_region_is_ram(section->mr)) {
3905
            section = &phys_sections[phys_section_rom];
3906
        }
3907
#ifdef TARGET_WORDS_BIGENDIAN
3908
        io_mem_write(section->mr, addr, val >> 32, 4);
3909
        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3910
#else
3911
        io_mem_write(section->mr, addr, (uint32_t)val, 4);
3912
        io_mem_write(section->mr, addr + 4, val >> 32, 4);
3913
#endif
3914
    } else {
3915
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3916
                                & TARGET_PAGE_MASK)
3917
                               + memory_region_section_addr(section, addr));
3918
        stq_p(ptr, val);
3919
    }
3920
}
3921

    
3922
/* warning: addr must be aligned */
3923
static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3924
                                     enum device_endian endian)
3925
{
3926
    uint8_t *ptr;
3927
    MemoryRegionSection *section;
3928

    
3929
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3930

    
3931
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3932
        addr = memory_region_section_addr(section, addr);
3933
        if (memory_region_is_ram(section->mr)) {
3934
            section = &phys_sections[phys_section_rom];
3935
        }
3936
#if defined(TARGET_WORDS_BIGENDIAN)
3937
        if (endian == DEVICE_LITTLE_ENDIAN) {
3938
            val = bswap32(val);
3939
        }
3940
#else
3941
        if (endian == DEVICE_BIG_ENDIAN) {
3942
            val = bswap32(val);
3943
        }
3944
#endif
3945
        io_mem_write(section->mr, addr, val, 4);
3946
    } else {
3947
        unsigned long addr1;
3948
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3949
            + memory_region_section_addr(section, addr);
3950
        /* RAM case */
3951
        ptr = qemu_get_ram_ptr(addr1);
3952
        switch (endian) {
3953
        case DEVICE_LITTLE_ENDIAN:
3954
            stl_le_p(ptr, val);
3955
            break;
3956
        case DEVICE_BIG_ENDIAN:
3957
            stl_be_p(ptr, val);
3958
            break;
3959
        default:
3960
            stl_p(ptr, val);
3961
            break;
3962
        }
3963
        if (!cpu_physical_memory_is_dirty(addr1)) {
3964
            /* invalidate code */
3965
            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3966
            /* set dirty bit */
3967
            cpu_physical_memory_set_dirty_flags(addr1,
3968
                (0xff & ~CODE_DIRTY_FLAG));
3969
        }
3970
    }
3971
}
3972

    
3973
void stl_phys(target_phys_addr_t addr, uint32_t val)
3974
{
3975
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3976
}
3977

    
3978
void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3979
{
3980
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3981
}
3982

    
3983
void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3984
{
3985
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3986
}
3987

    
3988
/* XXX: optimize */
3989
void stb_phys(target_phys_addr_t addr, uint32_t val)
3990
{
3991
    uint8_t v = val;
3992
    cpu_physical_memory_write(addr, &v, 1);
3993
}
3994

    
3995
/* warning: addr must be aligned */
3996
static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
3997
                                     enum device_endian endian)
3998
{
3999
    uint8_t *ptr;
4000
    MemoryRegionSection *section;
4001

    
4002
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
4003

    
4004
    if (!memory_region_is_ram(section->mr) || section->readonly) {
4005
        addr = memory_region_section_addr(section, addr);
4006
        if (memory_region_is_ram(section->mr)) {
4007
            section = &phys_sections[phys_section_rom];
4008
        }
4009
#if defined(TARGET_WORDS_BIGENDIAN)
4010
        if (endian == DEVICE_LITTLE_ENDIAN) {
4011
            val = bswap16(val);
4012
        }
4013
#else
4014
        if (endian == DEVICE_BIG_ENDIAN) {
4015
            val = bswap16(val);
4016
        }
4017
#endif
4018
        io_mem_write(section->mr, addr, val, 2);
4019
    } else {
4020
        unsigned long addr1;
4021
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4022
            + memory_region_section_addr(section, addr);
4023
        /* RAM case */
4024
        ptr = qemu_get_ram_ptr(addr1);
4025
        switch (endian) {
4026
        case DEVICE_LITTLE_ENDIAN:
4027
            stw_le_p(ptr, val);
4028
            break;
4029
        case DEVICE_BIG_ENDIAN:
4030
            stw_be_p(ptr, val);
4031
            break;
4032
        default:
4033
            stw_p(ptr, val);
4034
            break;
4035
        }
4036
        if (!cpu_physical_memory_is_dirty(addr1)) {
4037
            /* invalidate code */
4038
            tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4039
            /* set dirty bit */
4040
            cpu_physical_memory_set_dirty_flags(addr1,
4041
                (0xff & ~CODE_DIRTY_FLAG));
4042
        }
4043
    }
4044
}
4045

    
4046
void stw_phys(target_phys_addr_t addr, uint32_t val)
4047
{
4048
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4049
}
4050

    
4051
void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4052
{
4053
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4054
}
4055

    
4056
void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4057
{
4058
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4059
}
4060

    
4061
/* XXX: optimize */
4062
void stq_phys(target_phys_addr_t addr, uint64_t val)
4063
{
4064
    val = tswap64(val);
4065
    cpu_physical_memory_write(addr, &val, 8);
4066
}
4067

    
4068
void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4069
{
4070
    val = cpu_to_le64(val);
4071
    cpu_physical_memory_write(addr, &val, 8);
4072
}
4073

    
4074
void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4075
{
4076
    val = cpu_to_be64(val);
4077
    cpu_physical_memory_write(addr, &val, 8);
4078
}
4079

    
4080
/* virtual memory access for debug (includes writing to ROM) */
4081
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4082
                        uint8_t *buf, int len, int is_write)
4083
{
4084
    int l;
4085
    target_phys_addr_t phys_addr;
4086
    target_ulong page;
4087

    
4088
    while (len > 0) {
4089
        page = addr & TARGET_PAGE_MASK;
4090
        phys_addr = cpu_get_phys_page_debug(env, page);
4091
        /* if no physical page mapped, return an error */
4092
        if (phys_addr == -1)
4093
            return -1;
4094
        l = (page + TARGET_PAGE_SIZE) - addr;
4095
        if (l > len)
4096
            l = len;
4097
        phys_addr += (addr & ~TARGET_PAGE_MASK);
4098
        if (is_write)
4099
            cpu_physical_memory_write_rom(phys_addr, buf, l);
4100
        else
4101
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4102
        len -= l;
4103
        buf += l;
4104
        addr += l;
4105
    }
4106
    return 0;
4107
}
4108
#endif
4109

    
4110
/* in deterministic execution mode, instructions doing device I/Os
4111
   must be at the end of the TB */
4112
void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4113
{
4114
    TranslationBlock *tb;
4115
    uint32_t n, cflags;
4116
    target_ulong pc, cs_base;
4117
    uint64_t flags;
4118

    
4119
    tb = tb_find_pc(retaddr);
4120
    if (!tb) {
4121
        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
4122
                  (void *)retaddr);
4123
    }
4124
    n = env->icount_decr.u16.low + tb->icount;
4125
    cpu_restore_state(tb, env, retaddr);
4126
    /* Calculate how many instructions had been executed before the fault
4127
       occurred.  */
4128
    n = n - env->icount_decr.u16.low;
4129
    /* Generate a new TB ending on the I/O insn.  */
4130
    n++;
4131
    /* On MIPS and SH, delay slot instructions can only be restarted if
4132
       they were already the first instruction in the TB.  If this is not
4133
       the first instruction in a TB then re-execute the preceding
4134
       branch.  */
4135
#if defined(TARGET_MIPS)
4136
    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4137
        env->active_tc.PC -= 4;
4138
        env->icount_decr.u16.low++;
4139
        env->hflags &= ~MIPS_HFLAG_BMASK;
4140
    }
4141
#elif defined(TARGET_SH4)
4142
    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4143
            && n > 1) {
4144
        env->pc -= 2;
4145
        env->icount_decr.u16.low++;
4146
        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4147
    }
4148
#endif
4149
    /* This should never happen.  */
4150
    if (n > CF_COUNT_MASK)
4151
        cpu_abort(env, "TB too big during recompile");
4152

    
4153
    cflags = n | CF_LAST_IO;
4154
    pc = tb->pc;
4155
    cs_base = tb->cs_base;
4156
    flags = tb->flags;
4157
    tb_phys_invalidate(tb, -1);
4158
    /* FIXME: In theory this could raise an exception.  In practice
4159
       we have already translated the block once so it's probably ok.  */
4160
    tb_gen_code(env, pc, cs_base, flags, cflags);
4161
    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4162
       the first in the TB) then we end up generating a whole new TB and
4163
       repeating the fault, which is horribly inefficient.
4164
       Better would be to execute just this insn uncached, or generate a
4165
       second new TB.  */
4166
    cpu_resume_from_signal(env, NULL);
4167
}
4168

    
4169
#if !defined(CONFIG_USER_ONLY)
4170

    
4171
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4172
{
4173
    int i, target_code_size, max_target_code_size;
4174
    int direct_jmp_count, direct_jmp2_count, cross_page;
4175
    TranslationBlock *tb;
4176

    
4177
    target_code_size = 0;
4178
    max_target_code_size = 0;
4179
    cross_page = 0;
4180
    direct_jmp_count = 0;
4181
    direct_jmp2_count = 0;
4182
    for(i = 0; i < nb_tbs; i++) {
4183
        tb = &tbs[i];
4184
        target_code_size += tb->size;
4185
        if (tb->size > max_target_code_size)
4186
            max_target_code_size = tb->size;
4187
        if (tb->page_addr[1] != -1)
4188
            cross_page++;
4189
        if (tb->tb_next_offset[0] != 0xffff) {
4190
            direct_jmp_count++;
4191
            if (tb->tb_next_offset[1] != 0xffff) {
4192
                direct_jmp2_count++;
4193
            }
4194
        }
4195
    }
4196
    /* XXX: avoid using doubles ? */
4197
    cpu_fprintf(f, "Translation buffer state:\n");
4198
    cpu_fprintf(f, "gen code size       %td/%ld\n",
4199
                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4200
    cpu_fprintf(f, "TB count            %d/%d\n", 
4201
                nb_tbs, code_gen_max_blocks);
4202
    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
4203
                nb_tbs ? target_code_size / nb_tbs : 0,
4204
                max_target_code_size);
4205
    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
4206
                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4207
                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4208
    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4209
            cross_page,
4210
            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4211
    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
4212
                direct_jmp_count,
4213
                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4214
                direct_jmp2_count,
4215
                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4216
    cpu_fprintf(f, "\nStatistics:\n");
4217
    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
4218
    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4219
    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
4220
    tcg_dump_info(f, cpu_fprintf);
4221
}
4222

    
4223
/*
4224
 * A helper function for the _utterly broken_ virtio device model to find out if
4225
 * it's running on a big endian machine. Don't do this at home kids!
4226
 */
4227
bool virtio_is_big_endian(void);
4228
bool virtio_is_big_endian(void)
4229
{
4230
#if defined(TARGET_WORDS_BIGENDIAN)
4231
    return true;
4232
#else
4233
    return false;
4234
#endif
4235
}
4236

    
4237
#endif
4238

    
4239
#ifndef CONFIG_USER_ONLY
4240
bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4241
{
4242
    MemoryRegionSection *section;
4243

    
4244
    section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4245

    
4246
    return !(memory_region_is_ram(section->mr) ||
4247
             memory_region_is_romd(section->mr));
4248
}
4249
#endif