Statistics
| Branch: | Revision:

root / exec.c @ adb2a9b5

History | View | Annotate | Download (122.1 kB)

1
/*
2
 *  virtual page mapping and translated block handling
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "osdep.h"
33
#include "kvm.h"
34
#include "hw/xen.h"
35
#include "qemu-timer.h"
36
#include "memory.h"
37
#include "exec-memory.h"
38
#if defined(CONFIG_USER_ONLY)
39
#include <qemu.h>
40
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41
#include <sys/param.h>
42
#if __FreeBSD_version >= 700104
43
#define HAVE_KINFO_GETVMMAP
44
#define sigqueue sigqueue_freebsd  /* avoid redefinition */
45
#include <sys/time.h>
46
#include <sys/proc.h>
47
#include <machine/profile.h>
48
#define _KERNEL
49
#include <sys/user.h>
50
#undef _KERNEL
51
#undef sigqueue
52
#include <libutil.h>
53
#endif
54
#endif
55
#else /* !CONFIG_USER_ONLY */
56
#include "xen-mapcache.h"
57
#include "trace.h"
58
#endif
59

    
60
#include "cputlb.h"
61

    
62
#define WANT_EXEC_OBSOLETE
63
#include "exec-obsolete.h"
64

    
65
//#define DEBUG_TB_INVALIDATE
66
//#define DEBUG_FLUSH
67
//#define DEBUG_UNASSIGNED
68

    
69
/* make various TB consistency checks */
70
//#define DEBUG_TB_CHECK
71

    
72
//#define DEBUG_IOPORT
73
//#define DEBUG_SUBPAGE
74

    
75
#if !defined(CONFIG_USER_ONLY)
76
/* TB consistency checks only implemented for usermode emulation.  */
77
#undef DEBUG_TB_CHECK
78
#endif
79

    
80
#define SMC_BITMAP_USE_THRESHOLD 10
81

    
82
static TranslationBlock *tbs;
83
static int code_gen_max_blocks;
84
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85
static int nb_tbs;
86
/* any access to the tbs or the page table must use this lock */
87
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88

    
89
#if defined(__arm__) || defined(__sparc_v9__)
90
/* The prologue must be reachable with a direct jump. ARM and Sparc64
91
 have limited branch ranges (possibly also PPC) so place it in a
92
 section close to code segment. */
93
#define code_gen_section                                \
94
    __attribute__((__section__(".gen_code")))           \
95
    __attribute__((aligned (32)))
96
#elif defined(_WIN32) && !defined(_WIN64)
97
#define code_gen_section                                \
98
    __attribute__((aligned (16)))
99
#else
100
#define code_gen_section                                \
101
    __attribute__((aligned (32)))
102
#endif
103

    
104
uint8_t code_gen_prologue[1024] code_gen_section;
105
static uint8_t *code_gen_buffer;
106
static unsigned long code_gen_buffer_size;
107
/* threshold to flush the translated code buffer */
108
static unsigned long code_gen_buffer_max_size;
109
static uint8_t *code_gen_ptr;
110

    
111
#if !defined(CONFIG_USER_ONLY)
112
int phys_ram_fd;
113
static int in_migration;
114

    
115
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116

    
117
static MemoryRegion *system_memory;
118
static MemoryRegion *system_io;
119

    
120
MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121
static MemoryRegion io_mem_subpage_ram;
122

    
123
#endif
124

    
125
CPUArchState *first_cpu;
126
/* current CPU in the current thread. It is only valid inside
127
   cpu_exec() */
128
DEFINE_TLS(CPUArchState *,cpu_single_env);
129
/* 0 = Do not count executed instructions.
130
   1 = Precise instruction counting.
131
   2 = Adaptive rate instruction counting.  */
132
int use_icount = 0;
133

    
134
typedef struct PageDesc {
135
    /* list of TBs intersecting this ram page */
136
    TranslationBlock *first_tb;
137
    /* in order to optimize self modifying code, we count the number
138
       of lookups we do to a given page to use a bitmap */
139
    unsigned int code_write_count;
140
    uint8_t *code_bitmap;
141
#if defined(CONFIG_USER_ONLY)
142
    unsigned long flags;
143
#endif
144
} PageDesc;
145

    
146
/* In system mode we want L1_MAP to be based on ram offsets,
147
   while in user mode we want it to be based on virtual addresses.  */
148
#if !defined(CONFIG_USER_ONLY)
149
#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150
# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
151
#else
152
# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
153
#endif
154
#else
155
# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
156
#endif
157

    
158
/* Size of the L2 (and L3, etc) page tables.  */
159
#define L2_BITS 10
160
#define L2_SIZE (1 << L2_BITS)
161

    
162
#define P_L2_LEVELS \
163
    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164

    
165
/* The bits remaining after N lower levels of page tables.  */
166
#define V_L1_BITS_REM \
167
    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168

    
169
#if V_L1_BITS_REM < 4
170
#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
171
#else
172
#define V_L1_BITS  V_L1_BITS_REM
173
#endif
174

    
175
#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
176

    
177
#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178

    
179
uintptr_t qemu_real_host_page_size;
180
uintptr_t qemu_host_page_size;
181
uintptr_t qemu_host_page_mask;
182

    
183
/* This is a multi-level map on the virtual address space.
184
   The bottom level has pointers to PageDesc.  */
185
static void *l1_map[V_L1_SIZE];
186

    
187
#if !defined(CONFIG_USER_ONLY)
188
typedef struct PhysPageEntry PhysPageEntry;
189

    
190
static MemoryRegionSection *phys_sections;
191
static unsigned phys_sections_nb, phys_sections_nb_alloc;
192
static uint16_t phys_section_unassigned;
193
static uint16_t phys_section_notdirty;
194
static uint16_t phys_section_rom;
195
static uint16_t phys_section_watch;
196

    
197
struct PhysPageEntry {
198
    uint16_t is_leaf : 1;
199
     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200
    uint16_t ptr : 15;
201
};
202

    
203
/* Simple allocator for PhysPageEntry nodes */
204
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206

    
207
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208

    
209
/* This is a multi-level map on the physical address space.
210
   The bottom level has pointers to MemoryRegionSections.  */
211
static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212

    
213
static void io_mem_init(void);
214
static void memory_map_init(void);
215

    
216
static MemoryRegion io_mem_watch;
217
#endif
218

    
219
/* statistics */
220
static int tb_flush_count;
221
static int tb_phys_invalidate_count;
222

    
223
#ifdef _WIN32
224
static void map_exec(void *addr, long size)
225
{
226
    DWORD old_protect;
227
    VirtualProtect(addr, size,
228
                   PAGE_EXECUTE_READWRITE, &old_protect);
229
    
230
}
231
#else
232
static void map_exec(void *addr, long size)
233
{
234
    unsigned long start, end, page_size;
235
    
236
    page_size = getpagesize();
237
    start = (unsigned long)addr;
238
    start &= ~(page_size - 1);
239
    
240
    end = (unsigned long)addr + size;
241
    end += page_size - 1;
242
    end &= ~(page_size - 1);
243
    
244
    mprotect((void *)start, end - start,
245
             PROT_READ | PROT_WRITE | PROT_EXEC);
246
}
247
#endif
248

    
249
static void page_init(void)
250
{
251
    /* NOTE: we can always suppose that qemu_host_page_size >=
252
       TARGET_PAGE_SIZE */
253
#ifdef _WIN32
254
    {
255
        SYSTEM_INFO system_info;
256

    
257
        GetSystemInfo(&system_info);
258
        qemu_real_host_page_size = system_info.dwPageSize;
259
    }
260
#else
261
    qemu_real_host_page_size = getpagesize();
262
#endif
263
    if (qemu_host_page_size == 0)
264
        qemu_host_page_size = qemu_real_host_page_size;
265
    if (qemu_host_page_size < TARGET_PAGE_SIZE)
266
        qemu_host_page_size = TARGET_PAGE_SIZE;
267
    qemu_host_page_mask = ~(qemu_host_page_size - 1);
268

    
269
#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
270
    {
271
#ifdef HAVE_KINFO_GETVMMAP
272
        struct kinfo_vmentry *freep;
273
        int i, cnt;
274

    
275
        freep = kinfo_getvmmap(getpid(), &cnt);
276
        if (freep) {
277
            mmap_lock();
278
            for (i = 0; i < cnt; i++) {
279
                unsigned long startaddr, endaddr;
280

    
281
                startaddr = freep[i].kve_start;
282
                endaddr = freep[i].kve_end;
283
                if (h2g_valid(startaddr)) {
284
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
285

    
286
                    if (h2g_valid(endaddr)) {
287
                        endaddr = h2g(endaddr);
288
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
289
                    } else {
290
#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
291
                        endaddr = ~0ul;
292
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
293
#endif
294
                    }
295
                }
296
            }
297
            free(freep);
298
            mmap_unlock();
299
        }
300
#else
301
        FILE *f;
302

    
303
        last_brk = (unsigned long)sbrk(0);
304

    
305
        f = fopen("/compat/linux/proc/self/maps", "r");
306
        if (f) {
307
            mmap_lock();
308

    
309
            do {
310
                unsigned long startaddr, endaddr;
311
                int n;
312

    
313
                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
314

    
315
                if (n == 2 && h2g_valid(startaddr)) {
316
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
317

    
318
                    if (h2g_valid(endaddr)) {
319
                        endaddr = h2g(endaddr);
320
                    } else {
321
                        endaddr = ~0ul;
322
                    }
323
                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
324
                }
325
            } while (!feof(f));
326

    
327
            fclose(f);
328
            mmap_unlock();
329
        }
330
#endif
331
    }
332
#endif
333
}
334

    
335
static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
336
{
337
    PageDesc *pd;
338
    void **lp;
339
    int i;
340

    
341
#if defined(CONFIG_USER_ONLY)
342
    /* We can't use g_malloc because it may recurse into a locked mutex. */
343
# define ALLOC(P, SIZE)                                 \
344
    do {                                                \
345
        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
346
                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
347
    } while (0)
348
#else
349
# define ALLOC(P, SIZE) \
350
    do { P = g_malloc0(SIZE); } while (0)
351
#endif
352

    
353
    /* Level 1.  Always allocated.  */
354
    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
355

    
356
    /* Level 2..N-1.  */
357
    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
358
        void **p = *lp;
359

    
360
        if (p == NULL) {
361
            if (!alloc) {
362
                return NULL;
363
            }
364
            ALLOC(p, sizeof(void *) * L2_SIZE);
365
            *lp = p;
366
        }
367

    
368
        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
369
    }
370

    
371
    pd = *lp;
372
    if (pd == NULL) {
373
        if (!alloc) {
374
            return NULL;
375
        }
376
        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
377
        *lp = pd;
378
    }
379

    
380
#undef ALLOC
381

    
382
    return pd + (index & (L2_SIZE - 1));
383
}
384

    
385
static inline PageDesc *page_find(tb_page_addr_t index)
386
{
387
    return page_find_alloc(index, 0);
388
}
389

    
390
#if !defined(CONFIG_USER_ONLY)
391

    
392
static void phys_map_node_reserve(unsigned nodes)
393
{
394
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
395
        typedef PhysPageEntry Node[L2_SIZE];
396
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
397
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
398
                                      phys_map_nodes_nb + nodes);
399
        phys_map_nodes = g_renew(Node, phys_map_nodes,
400
                                 phys_map_nodes_nb_alloc);
401
    }
402
}
403

    
404
static uint16_t phys_map_node_alloc(void)
405
{
406
    unsigned i;
407
    uint16_t ret;
408

    
409
    ret = phys_map_nodes_nb++;
410
    assert(ret != PHYS_MAP_NODE_NIL);
411
    assert(ret != phys_map_nodes_nb_alloc);
412
    for (i = 0; i < L2_SIZE; ++i) {
413
        phys_map_nodes[ret][i].is_leaf = 0;
414
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
415
    }
416
    return ret;
417
}
418

    
419
static void phys_map_nodes_reset(void)
420
{
421
    phys_map_nodes_nb = 0;
422
}
423

    
424

    
425
static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
426
                                target_phys_addr_t *nb, uint16_t leaf,
427
                                int level)
428
{
429
    PhysPageEntry *p;
430
    int i;
431
    target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
432

    
433
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
434
        lp->ptr = phys_map_node_alloc();
435
        p = phys_map_nodes[lp->ptr];
436
        if (level == 0) {
437
            for (i = 0; i < L2_SIZE; i++) {
438
                p[i].is_leaf = 1;
439
                p[i].ptr = phys_section_unassigned;
440
            }
441
        }
442
    } else {
443
        p = phys_map_nodes[lp->ptr];
444
    }
445
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
446

    
447
    while (*nb && lp < &p[L2_SIZE]) {
448
        if ((*index & (step - 1)) == 0 && *nb >= step) {
449
            lp->is_leaf = true;
450
            lp->ptr = leaf;
451
            *index += step;
452
            *nb -= step;
453
        } else {
454
            phys_page_set_level(lp, index, nb, leaf, level - 1);
455
        }
456
        ++lp;
457
    }
458
}
459

    
460
static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
461
                          uint16_t leaf)
462
{
463
    /* Wildly overreserve - it doesn't matter much. */
464
    phys_map_node_reserve(3 * P_L2_LEVELS);
465

    
466
    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
467
}
468

    
469
MemoryRegionSection *phys_page_find(target_phys_addr_t index)
470
{
471
    PhysPageEntry lp = phys_map;
472
    PhysPageEntry *p;
473
    int i;
474
    uint16_t s_index = phys_section_unassigned;
475

    
476
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
477
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
478
            goto not_found;
479
        }
480
        p = phys_map_nodes[lp.ptr];
481
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
482
    }
483

    
484
    s_index = lp.ptr;
485
not_found:
486
    return &phys_sections[s_index];
487
}
488

    
489
bool memory_region_is_unassigned(MemoryRegion *mr)
490
{
491
    return mr != &io_mem_ram && mr != &io_mem_rom
492
        && mr != &io_mem_notdirty && !mr->rom_device
493
        && mr != &io_mem_watch;
494
}
495

    
496
#define mmap_lock() do { } while(0)
497
#define mmap_unlock() do { } while(0)
498
#endif
499

    
500
#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
501

    
502
#if defined(CONFIG_USER_ONLY)
503
/* Currently it is not recommended to allocate big chunks of data in
504
   user mode. It will change when a dedicated libc will be used */
505
#define USE_STATIC_CODE_GEN_BUFFER
506
#endif
507

    
508
#ifdef USE_STATIC_CODE_GEN_BUFFER
509
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
510
               __attribute__((aligned (CODE_GEN_ALIGN)));
511
#endif
512

    
513
static void code_gen_alloc(unsigned long tb_size)
514
{
515
#ifdef USE_STATIC_CODE_GEN_BUFFER
516
    code_gen_buffer = static_code_gen_buffer;
517
    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
518
    map_exec(code_gen_buffer, code_gen_buffer_size);
519
#else
520
    code_gen_buffer_size = tb_size;
521
    if (code_gen_buffer_size == 0) {
522
#if defined(CONFIG_USER_ONLY)
523
        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
524
#else
525
        /* XXX: needs adjustments */
526
        code_gen_buffer_size = (unsigned long)(ram_size / 4);
527
#endif
528
    }
529
    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
530
        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
531
    /* The code gen buffer location may have constraints depending on
532
       the host cpu and OS */
533
#if defined(__linux__) 
534
    {
535
        int flags;
536
        void *start = NULL;
537

    
538
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
539
#if defined(__x86_64__)
540
        flags |= MAP_32BIT;
541
        /* Cannot map more than that */
542
        if (code_gen_buffer_size > (800 * 1024 * 1024))
543
            code_gen_buffer_size = (800 * 1024 * 1024);
544
#elif defined(__sparc_v9__)
545
        // Map the buffer below 2G, so we can use direct calls and branches
546
        flags |= MAP_FIXED;
547
        start = (void *) 0x60000000UL;
548
        if (code_gen_buffer_size > (512 * 1024 * 1024))
549
            code_gen_buffer_size = (512 * 1024 * 1024);
550
#elif defined(__arm__)
551
        /* Keep the buffer no bigger than 16MB to branch between blocks */
552
        if (code_gen_buffer_size > 16 * 1024 * 1024)
553
            code_gen_buffer_size = 16 * 1024 * 1024;
554
#elif defined(__s390x__)
555
        /* Map the buffer so that we can use direct calls and branches.  */
556
        /* We have a +- 4GB range on the branches; leave some slop.  */
557
        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
558
            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
559
        }
560
        start = (void *)0x90000000UL;
561
#endif
562
        code_gen_buffer = mmap(start, code_gen_buffer_size,
563
                               PROT_WRITE | PROT_READ | PROT_EXEC,
564
                               flags, -1, 0);
565
        if (code_gen_buffer == MAP_FAILED) {
566
            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
567
            exit(1);
568
        }
569
    }
570
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
571
    || defined(__DragonFly__) || defined(__OpenBSD__) \
572
    || defined(__NetBSD__)
573
    {
574
        int flags;
575
        void *addr = NULL;
576
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
577
#if defined(__x86_64__)
578
        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
579
         * 0x40000000 is free */
580
        flags |= MAP_FIXED;
581
        addr = (void *)0x40000000;
582
        /* Cannot map more than that */
583
        if (code_gen_buffer_size > (800 * 1024 * 1024))
584
            code_gen_buffer_size = (800 * 1024 * 1024);
585
#elif defined(__sparc_v9__)
586
        // Map the buffer below 2G, so we can use direct calls and branches
587
        flags |= MAP_FIXED;
588
        addr = (void *) 0x60000000UL;
589
        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
590
            code_gen_buffer_size = (512 * 1024 * 1024);
591
        }
592
#endif
593
        code_gen_buffer = mmap(addr, code_gen_buffer_size,
594
                               PROT_WRITE | PROT_READ | PROT_EXEC, 
595
                               flags, -1, 0);
596
        if (code_gen_buffer == MAP_FAILED) {
597
            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
598
            exit(1);
599
        }
600
    }
601
#else
602
    code_gen_buffer = g_malloc(code_gen_buffer_size);
603
    map_exec(code_gen_buffer, code_gen_buffer_size);
604
#endif
605
#endif /* !USE_STATIC_CODE_GEN_BUFFER */
606
    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
607
    code_gen_buffer_max_size = code_gen_buffer_size -
608
        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
609
    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
610
    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
611
}
612

    
613
/* Must be called before using the QEMU cpus. 'tb_size' is the size
614
   (in bytes) allocated to the translation buffer. Zero means default
615
   size. */
616
void tcg_exec_init(unsigned long tb_size)
617
{
618
    cpu_gen_init();
619
    code_gen_alloc(tb_size);
620
    code_gen_ptr = code_gen_buffer;
621
    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
622
    page_init();
623
#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
624
    /* There's no guest base to take into account, so go ahead and
625
       initialize the prologue now.  */
626
    tcg_prologue_init(&tcg_ctx);
627
#endif
628
}
629

    
630
bool tcg_enabled(void)
631
{
632
    return code_gen_buffer != NULL;
633
}
634

    
635
void cpu_exec_init_all(void)
636
{
637
#if !defined(CONFIG_USER_ONLY)
638
    memory_map_init();
639
    io_mem_init();
640
#endif
641
}
642

    
643
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
644

    
645
static int cpu_common_post_load(void *opaque, int version_id)
646
{
647
    CPUArchState *env = opaque;
648

    
649
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
650
       version_id is increased. */
651
    env->interrupt_request &= ~0x01;
652
    tlb_flush(env, 1);
653

    
654
    return 0;
655
}
656

    
657
static const VMStateDescription vmstate_cpu_common = {
658
    .name = "cpu_common",
659
    .version_id = 1,
660
    .minimum_version_id = 1,
661
    .minimum_version_id_old = 1,
662
    .post_load = cpu_common_post_load,
663
    .fields      = (VMStateField []) {
664
        VMSTATE_UINT32(halted, CPUArchState),
665
        VMSTATE_UINT32(interrupt_request, CPUArchState),
666
        VMSTATE_END_OF_LIST()
667
    }
668
};
669
#endif
670

    
671
CPUArchState *qemu_get_cpu(int cpu)
672
{
673
    CPUArchState *env = first_cpu;
674

    
675
    while (env) {
676
        if (env->cpu_index == cpu)
677
            break;
678
        env = env->next_cpu;
679
    }
680

    
681
    return env;
682
}
683

    
684
void cpu_exec_init(CPUArchState *env)
685
{
686
    CPUArchState **penv;
687
    int cpu_index;
688

    
689
#if defined(CONFIG_USER_ONLY)
690
    cpu_list_lock();
691
#endif
692
    env->next_cpu = NULL;
693
    penv = &first_cpu;
694
    cpu_index = 0;
695
    while (*penv != NULL) {
696
        penv = &(*penv)->next_cpu;
697
        cpu_index++;
698
    }
699
    env->cpu_index = cpu_index;
700
    env->numa_node = 0;
701
    QTAILQ_INIT(&env->breakpoints);
702
    QTAILQ_INIT(&env->watchpoints);
703
#ifndef CONFIG_USER_ONLY
704
    env->thread_id = qemu_get_thread_id();
705
#endif
706
    *penv = env;
707
#if defined(CONFIG_USER_ONLY)
708
    cpu_list_unlock();
709
#endif
710
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
711
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
712
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
713
                    cpu_save, cpu_load, env);
714
#endif
715
}
716

    
717
/* Allocate a new translation block. Flush the translation buffer if
718
   too many translation blocks or too much generated code. */
719
static TranslationBlock *tb_alloc(target_ulong pc)
720
{
721
    TranslationBlock *tb;
722

    
723
    if (nb_tbs >= code_gen_max_blocks ||
724
        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
725
        return NULL;
726
    tb = &tbs[nb_tbs++];
727
    tb->pc = pc;
728
    tb->cflags = 0;
729
    return tb;
730
}
731

    
732
void tb_free(TranslationBlock *tb)
733
{
734
    /* In practice this is mostly used for single use temporary TB
735
       Ignore the hard cases and just back up if this TB happens to
736
       be the last one generated.  */
737
    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
738
        code_gen_ptr = tb->tc_ptr;
739
        nb_tbs--;
740
    }
741
}
742

    
743
static inline void invalidate_page_bitmap(PageDesc *p)
744
{
745
    if (p->code_bitmap) {
746
        g_free(p->code_bitmap);
747
        p->code_bitmap = NULL;
748
    }
749
    p->code_write_count = 0;
750
}
751

    
752
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
753

    
754
static void page_flush_tb_1 (int level, void **lp)
755
{
756
    int i;
757

    
758
    if (*lp == NULL) {
759
        return;
760
    }
761
    if (level == 0) {
762
        PageDesc *pd = *lp;
763
        for (i = 0; i < L2_SIZE; ++i) {
764
            pd[i].first_tb = NULL;
765
            invalidate_page_bitmap(pd + i);
766
        }
767
    } else {
768
        void **pp = *lp;
769
        for (i = 0; i < L2_SIZE; ++i) {
770
            page_flush_tb_1 (level - 1, pp + i);
771
        }
772
    }
773
}
774

    
775
static void page_flush_tb(void)
776
{
777
    int i;
778
    for (i = 0; i < V_L1_SIZE; i++) {
779
        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
780
    }
781
}
782

    
783
/* flush all the translation blocks */
784
/* XXX: tb_flush is currently not thread safe */
785
void tb_flush(CPUArchState *env1)
786
{
787
    CPUArchState *env;
788
#if defined(DEBUG_FLUSH)
789
    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
790
           (unsigned long)(code_gen_ptr - code_gen_buffer),
791
           nb_tbs, nb_tbs > 0 ?
792
           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
793
#endif
794
    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
795
        cpu_abort(env1, "Internal error: code buffer overflow\n");
796

    
797
    nb_tbs = 0;
798

    
799
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
800
        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
801
    }
802

    
803
    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
804
    page_flush_tb();
805

    
806
    code_gen_ptr = code_gen_buffer;
807
    /* XXX: flush processor icache at this point if cache flush is
808
       expensive */
809
    tb_flush_count++;
810
}
811

    
812
#ifdef DEBUG_TB_CHECK
813

    
814
static void tb_invalidate_check(target_ulong address)
815
{
816
    TranslationBlock *tb;
817
    int i;
818
    address &= TARGET_PAGE_MASK;
819
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
820
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
821
            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
822
                  address >= tb->pc + tb->size)) {
823
                printf("ERROR invalidate: address=" TARGET_FMT_lx
824
                       " PC=%08lx size=%04x\n",
825
                       address, (long)tb->pc, tb->size);
826
            }
827
        }
828
    }
829
}
830

    
831
/* verify that all the pages have correct rights for code */
832
static void tb_page_check(void)
833
{
834
    TranslationBlock *tb;
835
    int i, flags1, flags2;
836

    
837
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
838
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
839
            flags1 = page_get_flags(tb->pc);
840
            flags2 = page_get_flags(tb->pc + tb->size - 1);
841
            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
842
                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
843
                       (long)tb->pc, tb->size, flags1, flags2);
844
            }
845
        }
846
    }
847
}
848

    
849
#endif
850

    
851
/* invalidate one TB */
852
static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
853
                             int next_offset)
854
{
855
    TranslationBlock *tb1;
856
    for(;;) {
857
        tb1 = *ptb;
858
        if (tb1 == tb) {
859
            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
860
            break;
861
        }
862
        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
863
    }
864
}
865

    
866
static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
867
{
868
    TranslationBlock *tb1;
869
    unsigned int n1;
870

    
871
    for(;;) {
872
        tb1 = *ptb;
873
        n1 = (uintptr_t)tb1 & 3;
874
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
875
        if (tb1 == tb) {
876
            *ptb = tb1->page_next[n1];
877
            break;
878
        }
879
        ptb = &tb1->page_next[n1];
880
    }
881
}
882

    
883
static inline void tb_jmp_remove(TranslationBlock *tb, int n)
884
{
885
    TranslationBlock *tb1, **ptb;
886
    unsigned int n1;
887

    
888
    ptb = &tb->jmp_next[n];
889
    tb1 = *ptb;
890
    if (tb1) {
891
        /* find tb(n) in circular list */
892
        for(;;) {
893
            tb1 = *ptb;
894
            n1 = (uintptr_t)tb1 & 3;
895
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
896
            if (n1 == n && tb1 == tb)
897
                break;
898
            if (n1 == 2) {
899
                ptb = &tb1->jmp_first;
900
            } else {
901
                ptb = &tb1->jmp_next[n1];
902
            }
903
        }
904
        /* now we can suppress tb(n) from the list */
905
        *ptb = tb->jmp_next[n];
906

    
907
        tb->jmp_next[n] = NULL;
908
    }
909
}
910

    
911
/* reset the jump entry 'n' of a TB so that it is not chained to
912
   another TB */
913
static inline void tb_reset_jump(TranslationBlock *tb, int n)
914
{
915
    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
916
}
917

    
918
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
919
{
920
    CPUArchState *env;
921
    PageDesc *p;
922
    unsigned int h, n1;
923
    tb_page_addr_t phys_pc;
924
    TranslationBlock *tb1, *tb2;
925

    
926
    /* remove the TB from the hash list */
927
    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
928
    h = tb_phys_hash_func(phys_pc);
929
    tb_remove(&tb_phys_hash[h], tb,
930
              offsetof(TranslationBlock, phys_hash_next));
931

    
932
    /* remove the TB from the page list */
933
    if (tb->page_addr[0] != page_addr) {
934
        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
935
        tb_page_remove(&p->first_tb, tb);
936
        invalidate_page_bitmap(p);
937
    }
938
    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
939
        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
940
        tb_page_remove(&p->first_tb, tb);
941
        invalidate_page_bitmap(p);
942
    }
943

    
944
    tb_invalidated_flag = 1;
945

    
946
    /* remove the TB from the hash list */
947
    h = tb_jmp_cache_hash_func(tb->pc);
948
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
949
        if (env->tb_jmp_cache[h] == tb)
950
            env->tb_jmp_cache[h] = NULL;
951
    }
952

    
953
    /* suppress this TB from the two jump lists */
954
    tb_jmp_remove(tb, 0);
955
    tb_jmp_remove(tb, 1);
956

    
957
    /* suppress any remaining jumps to this TB */
958
    tb1 = tb->jmp_first;
959
    for(;;) {
960
        n1 = (uintptr_t)tb1 & 3;
961
        if (n1 == 2)
962
            break;
963
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
964
        tb2 = tb1->jmp_next[n1];
965
        tb_reset_jump(tb1, n1);
966
        tb1->jmp_next[n1] = NULL;
967
        tb1 = tb2;
968
    }
969
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
970

    
971
    tb_phys_invalidate_count++;
972
}
973

    
974
static inline void set_bits(uint8_t *tab, int start, int len)
975
{
976
    int end, mask, end1;
977

    
978
    end = start + len;
979
    tab += start >> 3;
980
    mask = 0xff << (start & 7);
981
    if ((start & ~7) == (end & ~7)) {
982
        if (start < end) {
983
            mask &= ~(0xff << (end & 7));
984
            *tab |= mask;
985
        }
986
    } else {
987
        *tab++ |= mask;
988
        start = (start + 8) & ~7;
989
        end1 = end & ~7;
990
        while (start < end1) {
991
            *tab++ = 0xff;
992
            start += 8;
993
        }
994
        if (start < end) {
995
            mask = ~(0xff << (end & 7));
996
            *tab |= mask;
997
        }
998
    }
999
}
1000

    
1001
static void build_page_bitmap(PageDesc *p)
1002
{
1003
    int n, tb_start, tb_end;
1004
    TranslationBlock *tb;
1005

    
1006
    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1007

    
1008
    tb = p->first_tb;
1009
    while (tb != NULL) {
1010
        n = (uintptr_t)tb & 3;
1011
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1012
        /* NOTE: this is subtle as a TB may span two physical pages */
1013
        if (n == 0) {
1014
            /* NOTE: tb_end may be after the end of the page, but
1015
               it is not a problem */
1016
            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1017
            tb_end = tb_start + tb->size;
1018
            if (tb_end > TARGET_PAGE_SIZE)
1019
                tb_end = TARGET_PAGE_SIZE;
1020
        } else {
1021
            tb_start = 0;
1022
            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1023
        }
1024
        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1025
        tb = tb->page_next[n];
1026
    }
1027
}
1028

    
1029
TranslationBlock *tb_gen_code(CPUArchState *env,
1030
                              target_ulong pc, target_ulong cs_base,
1031
                              int flags, int cflags)
1032
{
1033
    TranslationBlock *tb;
1034
    uint8_t *tc_ptr;
1035
    tb_page_addr_t phys_pc, phys_page2;
1036
    target_ulong virt_page2;
1037
    int code_gen_size;
1038

    
1039
    phys_pc = get_page_addr_code(env, pc);
1040
    tb = tb_alloc(pc);
1041
    if (!tb) {
1042
        /* flush must be done */
1043
        tb_flush(env);
1044
        /* cannot fail at this point */
1045
        tb = tb_alloc(pc);
1046
        /* Don't forget to invalidate previous TB info.  */
1047
        tb_invalidated_flag = 1;
1048
    }
1049
    tc_ptr = code_gen_ptr;
1050
    tb->tc_ptr = tc_ptr;
1051
    tb->cs_base = cs_base;
1052
    tb->flags = flags;
1053
    tb->cflags = cflags;
1054
    cpu_gen_code(env, tb, &code_gen_size);
1055
    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1056
                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1057

    
1058
    /* check next page if needed */
1059
    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1060
    phys_page2 = -1;
1061
    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1062
        phys_page2 = get_page_addr_code(env, virt_page2);
1063
    }
1064
    tb_link_page(tb, phys_pc, phys_page2);
1065
    return tb;
1066
}
1067

    
1068
/*
1069
 * Invalidate all TBs which intersect with the target physical address range
1070
 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1071
 * 'is_cpu_write_access' should be true if called from a real cpu write
1072
 * access: the virtual CPU will exit the current TB if code is modified inside
1073
 * this TB.
1074
 */
1075
void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1076
                              int is_cpu_write_access)
1077
{
1078
    while (start < end) {
1079
        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1080
        start &= TARGET_PAGE_MASK;
1081
        start += TARGET_PAGE_SIZE;
1082
    }
1083
}
1084

    
1085
/*
1086
 * Invalidate all TBs which intersect with the target physical address range
1087
 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1088
 * 'is_cpu_write_access' should be true if called from a real cpu write
1089
 * access: the virtual CPU will exit the current TB if code is modified inside
1090
 * this TB.
1091
 */
1092
void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1093
                                   int is_cpu_write_access)
1094
{
1095
    TranslationBlock *tb, *tb_next, *saved_tb;
1096
    CPUArchState *env = cpu_single_env;
1097
    tb_page_addr_t tb_start, tb_end;
1098
    PageDesc *p;
1099
    int n;
1100
#ifdef TARGET_HAS_PRECISE_SMC
1101
    int current_tb_not_found = is_cpu_write_access;
1102
    TranslationBlock *current_tb = NULL;
1103
    int current_tb_modified = 0;
1104
    target_ulong current_pc = 0;
1105
    target_ulong current_cs_base = 0;
1106
    int current_flags = 0;
1107
#endif /* TARGET_HAS_PRECISE_SMC */
1108

    
1109
    p = page_find(start >> TARGET_PAGE_BITS);
1110
    if (!p)
1111
        return;
1112
    if (!p->code_bitmap &&
1113
        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1114
        is_cpu_write_access) {
1115
        /* build code bitmap */
1116
        build_page_bitmap(p);
1117
    }
1118

    
1119
    /* we remove all the TBs in the range [start, end[ */
1120
    /* XXX: see if in some cases it could be faster to invalidate all the code */
1121
    tb = p->first_tb;
1122
    while (tb != NULL) {
1123
        n = (uintptr_t)tb & 3;
1124
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1125
        tb_next = tb->page_next[n];
1126
        /* NOTE: this is subtle as a TB may span two physical pages */
1127
        if (n == 0) {
1128
            /* NOTE: tb_end may be after the end of the page, but
1129
               it is not a problem */
1130
            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1131
            tb_end = tb_start + tb->size;
1132
        } else {
1133
            tb_start = tb->page_addr[1];
1134
            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1135
        }
1136
        if (!(tb_end <= start || tb_start >= end)) {
1137
#ifdef TARGET_HAS_PRECISE_SMC
1138
            if (current_tb_not_found) {
1139
                current_tb_not_found = 0;
1140
                current_tb = NULL;
1141
                if (env->mem_io_pc) {
1142
                    /* now we have a real cpu fault */
1143
                    current_tb = tb_find_pc(env->mem_io_pc);
1144
                }
1145
            }
1146
            if (current_tb == tb &&
1147
                (current_tb->cflags & CF_COUNT_MASK) != 1) {
1148
                /* If we are modifying the current TB, we must stop
1149
                its execution. We could be more precise by checking
1150
                that the modification is after the current PC, but it
1151
                would require a specialized function to partially
1152
                restore the CPU state */
1153

    
1154
                current_tb_modified = 1;
1155
                cpu_restore_state(current_tb, env, env->mem_io_pc);
1156
                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1157
                                     &current_flags);
1158
            }
1159
#endif /* TARGET_HAS_PRECISE_SMC */
1160
            /* we need to do that to handle the case where a signal
1161
               occurs while doing tb_phys_invalidate() */
1162
            saved_tb = NULL;
1163
            if (env) {
1164
                saved_tb = env->current_tb;
1165
                env->current_tb = NULL;
1166
            }
1167
            tb_phys_invalidate(tb, -1);
1168
            if (env) {
1169
                env->current_tb = saved_tb;
1170
                if (env->interrupt_request && env->current_tb)
1171
                    cpu_interrupt(env, env->interrupt_request);
1172
            }
1173
        }
1174
        tb = tb_next;
1175
    }
1176
#if !defined(CONFIG_USER_ONLY)
1177
    /* if no code remaining, no need to continue to use slow writes */
1178
    if (!p->first_tb) {
1179
        invalidate_page_bitmap(p);
1180
        if (is_cpu_write_access) {
1181
            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1182
        }
1183
    }
1184
#endif
1185
#ifdef TARGET_HAS_PRECISE_SMC
1186
    if (current_tb_modified) {
1187
        /* we generate a block containing just the instruction
1188
           modifying the memory. It will ensure that it cannot modify
1189
           itself */
1190
        env->current_tb = NULL;
1191
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1192
        cpu_resume_from_signal(env, NULL);
1193
    }
1194
#endif
1195
}
1196

    
1197
/* len must be <= 8 and start must be a multiple of len */
1198
static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1199
{
1200
    PageDesc *p;
1201
    int offset, b;
1202
#if 0
1203
    if (1) {
1204
        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1205
                  cpu_single_env->mem_io_vaddr, len,
1206
                  cpu_single_env->eip,
1207
                  cpu_single_env->eip +
1208
                  (intptr_t)cpu_single_env->segs[R_CS].base);
1209
    }
1210
#endif
1211
    p = page_find(start >> TARGET_PAGE_BITS);
1212
    if (!p)
1213
        return;
1214
    if (p->code_bitmap) {
1215
        offset = start & ~TARGET_PAGE_MASK;
1216
        b = p->code_bitmap[offset >> 3] >> (offset & 7);
1217
        if (b & ((1 << len) - 1))
1218
            goto do_invalidate;
1219
    } else {
1220
    do_invalidate:
1221
        tb_invalidate_phys_page_range(start, start + len, 1);
1222
    }
1223
}
1224

    
1225
#if !defined(CONFIG_SOFTMMU)
1226
static void tb_invalidate_phys_page(tb_page_addr_t addr,
1227
                                    uintptr_t pc, void *puc)
1228
{
1229
    TranslationBlock *tb;
1230
    PageDesc *p;
1231
    int n;
1232
#ifdef TARGET_HAS_PRECISE_SMC
1233
    TranslationBlock *current_tb = NULL;
1234
    CPUArchState *env = cpu_single_env;
1235
    int current_tb_modified = 0;
1236
    target_ulong current_pc = 0;
1237
    target_ulong current_cs_base = 0;
1238
    int current_flags = 0;
1239
#endif
1240

    
1241
    addr &= TARGET_PAGE_MASK;
1242
    p = page_find(addr >> TARGET_PAGE_BITS);
1243
    if (!p)
1244
        return;
1245
    tb = p->first_tb;
1246
#ifdef TARGET_HAS_PRECISE_SMC
1247
    if (tb && pc != 0) {
1248
        current_tb = tb_find_pc(pc);
1249
    }
1250
#endif
1251
    while (tb != NULL) {
1252
        n = (uintptr_t)tb & 3;
1253
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1254
#ifdef TARGET_HAS_PRECISE_SMC
1255
        if (current_tb == tb &&
1256
            (current_tb->cflags & CF_COUNT_MASK) != 1) {
1257
                /* If we are modifying the current TB, we must stop
1258
                   its execution. We could be more precise by checking
1259
                   that the modification is after the current PC, but it
1260
                   would require a specialized function to partially
1261
                   restore the CPU state */
1262

    
1263
            current_tb_modified = 1;
1264
            cpu_restore_state(current_tb, env, pc);
1265
            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1266
                                 &current_flags);
1267
        }
1268
#endif /* TARGET_HAS_PRECISE_SMC */
1269
        tb_phys_invalidate(tb, addr);
1270
        tb = tb->page_next[n];
1271
    }
1272
    p->first_tb = NULL;
1273
#ifdef TARGET_HAS_PRECISE_SMC
1274
    if (current_tb_modified) {
1275
        /* we generate a block containing just the instruction
1276
           modifying the memory. It will ensure that it cannot modify
1277
           itself */
1278
        env->current_tb = NULL;
1279
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1280
        cpu_resume_from_signal(env, puc);
1281
    }
1282
#endif
1283
}
1284
#endif
1285

    
1286
/* add the tb in the target page and protect it if necessary */
1287
static inline void tb_alloc_page(TranslationBlock *tb,
1288
                                 unsigned int n, tb_page_addr_t page_addr)
1289
{
1290
    PageDesc *p;
1291
#ifndef CONFIG_USER_ONLY
1292
    bool page_already_protected;
1293
#endif
1294

    
1295
    tb->page_addr[n] = page_addr;
1296
    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1297
    tb->page_next[n] = p->first_tb;
1298
#ifndef CONFIG_USER_ONLY
1299
    page_already_protected = p->first_tb != NULL;
1300
#endif
1301
    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1302
    invalidate_page_bitmap(p);
1303

    
1304
#if defined(TARGET_HAS_SMC) || 1
1305

    
1306
#if defined(CONFIG_USER_ONLY)
1307
    if (p->flags & PAGE_WRITE) {
1308
        target_ulong addr;
1309
        PageDesc *p2;
1310
        int prot;
1311

    
1312
        /* force the host page as non writable (writes will have a
1313
           page fault + mprotect overhead) */
1314
        page_addr &= qemu_host_page_mask;
1315
        prot = 0;
1316
        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1317
            addr += TARGET_PAGE_SIZE) {
1318

    
1319
            p2 = page_find (addr >> TARGET_PAGE_BITS);
1320
            if (!p2)
1321
                continue;
1322
            prot |= p2->flags;
1323
            p2->flags &= ~PAGE_WRITE;
1324
          }
1325
        mprotect(g2h(page_addr), qemu_host_page_size,
1326
                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1327
#ifdef DEBUG_TB_INVALIDATE
1328
        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1329
               page_addr);
1330
#endif
1331
    }
1332
#else
1333
    /* if some code is already present, then the pages are already
1334
       protected. So we handle the case where only the first TB is
1335
       allocated in a physical page */
1336
    if (!page_already_protected) {
1337
        tlb_protect_code(page_addr);
1338
    }
1339
#endif
1340

    
1341
#endif /* TARGET_HAS_SMC */
1342
}
1343

    
1344
/* add a new TB and link it to the physical page tables. phys_page2 is
1345
   (-1) to indicate that only one page contains the TB. */
1346
void tb_link_page(TranslationBlock *tb,
1347
                  tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1348
{
1349
    unsigned int h;
1350
    TranslationBlock **ptb;
1351

    
1352
    /* Grab the mmap lock to stop another thread invalidating this TB
1353
       before we are done.  */
1354
    mmap_lock();
1355
    /* add in the physical hash table */
1356
    h = tb_phys_hash_func(phys_pc);
1357
    ptb = &tb_phys_hash[h];
1358
    tb->phys_hash_next = *ptb;
1359
    *ptb = tb;
1360

    
1361
    /* add in the page list */
1362
    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1363
    if (phys_page2 != -1)
1364
        tb_alloc_page(tb, 1, phys_page2);
1365
    else
1366
        tb->page_addr[1] = -1;
1367

    
1368
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1369
    tb->jmp_next[0] = NULL;
1370
    tb->jmp_next[1] = NULL;
1371

    
1372
    /* init original jump addresses */
1373
    if (tb->tb_next_offset[0] != 0xffff)
1374
        tb_reset_jump(tb, 0);
1375
    if (tb->tb_next_offset[1] != 0xffff)
1376
        tb_reset_jump(tb, 1);
1377

    
1378
#ifdef DEBUG_TB_CHECK
1379
    tb_page_check();
1380
#endif
1381
    mmap_unlock();
1382
}
1383

    
1384
/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1385
   tb[1].tc_ptr. Return NULL if not found */
1386
TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1387
{
1388
    int m_min, m_max, m;
1389
    uintptr_t v;
1390
    TranslationBlock *tb;
1391

    
1392
    if (nb_tbs <= 0)
1393
        return NULL;
1394
    if (tc_ptr < (uintptr_t)code_gen_buffer ||
1395
        tc_ptr >= (uintptr_t)code_gen_ptr) {
1396
        return NULL;
1397
    }
1398
    /* binary search (cf Knuth) */
1399
    m_min = 0;
1400
    m_max = nb_tbs - 1;
1401
    while (m_min <= m_max) {
1402
        m = (m_min + m_max) >> 1;
1403
        tb = &tbs[m];
1404
        v = (uintptr_t)tb->tc_ptr;
1405
        if (v == tc_ptr)
1406
            return tb;
1407
        else if (tc_ptr < v) {
1408
            m_max = m - 1;
1409
        } else {
1410
            m_min = m + 1;
1411
        }
1412
    }
1413
    return &tbs[m_max];
1414
}
1415

    
1416
static void tb_reset_jump_recursive(TranslationBlock *tb);
1417

    
1418
static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1419
{
1420
    TranslationBlock *tb1, *tb_next, **ptb;
1421
    unsigned int n1;
1422

    
1423
    tb1 = tb->jmp_next[n];
1424
    if (tb1 != NULL) {
1425
        /* find head of list */
1426
        for(;;) {
1427
            n1 = (uintptr_t)tb1 & 3;
1428
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1429
            if (n1 == 2)
1430
                break;
1431
            tb1 = tb1->jmp_next[n1];
1432
        }
1433
        /* we are now sure now that tb jumps to tb1 */
1434
        tb_next = tb1;
1435

    
1436
        /* remove tb from the jmp_first list */
1437
        ptb = &tb_next->jmp_first;
1438
        for(;;) {
1439
            tb1 = *ptb;
1440
            n1 = (uintptr_t)tb1 & 3;
1441
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1442
            if (n1 == n && tb1 == tb)
1443
                break;
1444
            ptb = &tb1->jmp_next[n1];
1445
        }
1446
        *ptb = tb->jmp_next[n];
1447
        tb->jmp_next[n] = NULL;
1448

    
1449
        /* suppress the jump to next tb in generated code */
1450
        tb_reset_jump(tb, n);
1451

    
1452
        /* suppress jumps in the tb on which we could have jumped */
1453
        tb_reset_jump_recursive(tb_next);
1454
    }
1455
}
1456

    
1457
static void tb_reset_jump_recursive(TranslationBlock *tb)
1458
{
1459
    tb_reset_jump_recursive2(tb, 0);
1460
    tb_reset_jump_recursive2(tb, 1);
1461
}
1462

    
1463
#if defined(TARGET_HAS_ICE)
1464
#if defined(CONFIG_USER_ONLY)
1465
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1466
{
1467
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
1468
}
1469
#else
1470
void tb_invalidate_phys_addr(target_phys_addr_t addr)
1471
{
1472
    ram_addr_t ram_addr;
1473
    MemoryRegionSection *section;
1474

    
1475
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
1476
    if (!(memory_region_is_ram(section->mr)
1477
          || (section->mr->rom_device && section->mr->readable))) {
1478
        return;
1479
    }
1480
    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1481
        + memory_region_section_addr(section, addr);
1482
    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1483
}
1484

    
1485
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1486
{
1487
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1488
            (pc & ~TARGET_PAGE_MASK));
1489
}
1490
#endif
1491
#endif /* TARGET_HAS_ICE */
1492

    
1493
#if defined(CONFIG_USER_ONLY)
1494
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1495

    
1496
{
1497
}
1498

    
1499
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1500
                          int flags, CPUWatchpoint **watchpoint)
1501
{
1502
    return -ENOSYS;
1503
}
1504
#else
1505
/* Add a watchpoint.  */
1506
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1507
                          int flags, CPUWatchpoint **watchpoint)
1508
{
1509
    target_ulong len_mask = ~(len - 1);
1510
    CPUWatchpoint *wp;
1511

    
1512
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1513
    if ((len & (len - 1)) || (addr & ~len_mask) ||
1514
            len == 0 || len > TARGET_PAGE_SIZE) {
1515
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1516
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1517
        return -EINVAL;
1518
    }
1519
    wp = g_malloc(sizeof(*wp));
1520

    
1521
    wp->vaddr = addr;
1522
    wp->len_mask = len_mask;
1523
    wp->flags = flags;
1524

    
1525
    /* keep all GDB-injected watchpoints in front */
1526
    if (flags & BP_GDB)
1527
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1528
    else
1529
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1530

    
1531
    tlb_flush_page(env, addr);
1532

    
1533
    if (watchpoint)
1534
        *watchpoint = wp;
1535
    return 0;
1536
}
1537

    
1538
/* Remove a specific watchpoint.  */
1539
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1540
                          int flags)
1541
{
1542
    target_ulong len_mask = ~(len - 1);
1543
    CPUWatchpoint *wp;
1544

    
1545
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1546
        if (addr == wp->vaddr && len_mask == wp->len_mask
1547
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1548
            cpu_watchpoint_remove_by_ref(env, wp);
1549
            return 0;
1550
        }
1551
    }
1552
    return -ENOENT;
1553
}
1554

    
1555
/* Remove a specific watchpoint by reference.  */
1556
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1557
{
1558
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1559

    
1560
    tlb_flush_page(env, watchpoint->vaddr);
1561

    
1562
    g_free(watchpoint);
1563
}
1564

    
1565
/* Remove all matching watchpoints.  */
1566
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1567
{
1568
    CPUWatchpoint *wp, *next;
1569

    
1570
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1571
        if (wp->flags & mask)
1572
            cpu_watchpoint_remove_by_ref(env, wp);
1573
    }
1574
}
1575
#endif
1576

    
1577
/* Add a breakpoint.  */
1578
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1579
                          CPUBreakpoint **breakpoint)
1580
{
1581
#if defined(TARGET_HAS_ICE)
1582
    CPUBreakpoint *bp;
1583

    
1584
    bp = g_malloc(sizeof(*bp));
1585

    
1586
    bp->pc = pc;
1587
    bp->flags = flags;
1588

    
1589
    /* keep all GDB-injected breakpoints in front */
1590
    if (flags & BP_GDB)
1591
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1592
    else
1593
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1594

    
1595
    breakpoint_invalidate(env, pc);
1596

    
1597
    if (breakpoint)
1598
        *breakpoint = bp;
1599
    return 0;
1600
#else
1601
    return -ENOSYS;
1602
#endif
1603
}
1604

    
1605
/* Remove a specific breakpoint.  */
1606
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1607
{
1608
#if defined(TARGET_HAS_ICE)
1609
    CPUBreakpoint *bp;
1610

    
1611
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1612
        if (bp->pc == pc && bp->flags == flags) {
1613
            cpu_breakpoint_remove_by_ref(env, bp);
1614
            return 0;
1615
        }
1616
    }
1617
    return -ENOENT;
1618
#else
1619
    return -ENOSYS;
1620
#endif
1621
}
1622

    
1623
/* Remove a specific breakpoint by reference.  */
1624
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1625
{
1626
#if defined(TARGET_HAS_ICE)
1627
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1628

    
1629
    breakpoint_invalidate(env, breakpoint->pc);
1630

    
1631
    g_free(breakpoint);
1632
#endif
1633
}
1634

    
1635
/* Remove all matching breakpoints. */
1636
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1637
{
1638
#if defined(TARGET_HAS_ICE)
1639
    CPUBreakpoint *bp, *next;
1640

    
1641
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1642
        if (bp->flags & mask)
1643
            cpu_breakpoint_remove_by_ref(env, bp);
1644
    }
1645
#endif
1646
}
1647

    
1648
/* enable or disable single step mode. EXCP_DEBUG is returned by the
1649
   CPU loop after each instruction */
1650
void cpu_single_step(CPUArchState *env, int enabled)
1651
{
1652
#if defined(TARGET_HAS_ICE)
1653
    if (env->singlestep_enabled != enabled) {
1654
        env->singlestep_enabled = enabled;
1655
        if (kvm_enabled())
1656
            kvm_update_guest_debug(env, 0);
1657
        else {
1658
            /* must flush all the translated code to avoid inconsistencies */
1659
            /* XXX: only flush what is necessary */
1660
            tb_flush(env);
1661
        }
1662
    }
1663
#endif
1664
}
1665

    
1666
static void cpu_unlink_tb(CPUArchState *env)
1667
{
1668
    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
1669
       problem and hope the cpu will stop of its own accord.  For userspace
1670
       emulation this often isn't actually as bad as it sounds.  Often
1671
       signals are used primarily to interrupt blocking syscalls.  */
1672
    TranslationBlock *tb;
1673
    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1674

    
1675
    spin_lock(&interrupt_lock);
1676
    tb = env->current_tb;
1677
    /* if the cpu is currently executing code, we must unlink it and
1678
       all the potentially executing TB */
1679
    if (tb) {
1680
        env->current_tb = NULL;
1681
        tb_reset_jump_recursive(tb);
1682
    }
1683
    spin_unlock(&interrupt_lock);
1684
}
1685

    
1686
#ifndef CONFIG_USER_ONLY
1687
/* mask must never be zero, except for A20 change call */
1688
static void tcg_handle_interrupt(CPUArchState *env, int mask)
1689
{
1690
    int old_mask;
1691

    
1692
    old_mask = env->interrupt_request;
1693
    env->interrupt_request |= mask;
1694

    
1695
    /*
1696
     * If called from iothread context, wake the target cpu in
1697
     * case its halted.
1698
     */
1699
    if (!qemu_cpu_is_self(env)) {
1700
        qemu_cpu_kick(env);
1701
        return;
1702
    }
1703

    
1704
    if (use_icount) {
1705
        env->icount_decr.u16.high = 0xffff;
1706
        if (!can_do_io(env)
1707
            && (mask & ~old_mask) != 0) {
1708
            cpu_abort(env, "Raised interrupt while not in I/O function");
1709
        }
1710
    } else {
1711
        cpu_unlink_tb(env);
1712
    }
1713
}
1714

    
1715
CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1716

    
1717
#else /* CONFIG_USER_ONLY */
1718

    
1719
void cpu_interrupt(CPUArchState *env, int mask)
1720
{
1721
    env->interrupt_request |= mask;
1722
    cpu_unlink_tb(env);
1723
}
1724
#endif /* CONFIG_USER_ONLY */
1725

    
1726
void cpu_reset_interrupt(CPUArchState *env, int mask)
1727
{
1728
    env->interrupt_request &= ~mask;
1729
}
1730

    
1731
void cpu_exit(CPUArchState *env)
1732
{
1733
    env->exit_request = 1;
1734
    cpu_unlink_tb(env);
1735
}
1736

    
1737
void cpu_abort(CPUArchState *env, const char *fmt, ...)
1738
{
1739
    va_list ap;
1740
    va_list ap2;
1741

    
1742
    va_start(ap, fmt);
1743
    va_copy(ap2, ap);
1744
    fprintf(stderr, "qemu: fatal: ");
1745
    vfprintf(stderr, fmt, ap);
1746
    fprintf(stderr, "\n");
1747
#ifdef TARGET_I386
1748
    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1749
#else
1750
    cpu_dump_state(env, stderr, fprintf, 0);
1751
#endif
1752
    if (qemu_log_enabled()) {
1753
        qemu_log("qemu: fatal: ");
1754
        qemu_log_vprintf(fmt, ap2);
1755
        qemu_log("\n");
1756
#ifdef TARGET_I386
1757
        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1758
#else
1759
        log_cpu_state(env, 0);
1760
#endif
1761
        qemu_log_flush();
1762
        qemu_log_close();
1763
    }
1764
    va_end(ap2);
1765
    va_end(ap);
1766
#if defined(CONFIG_USER_ONLY)
1767
    {
1768
        struct sigaction act;
1769
        sigfillset(&act.sa_mask);
1770
        act.sa_handler = SIG_DFL;
1771
        sigaction(SIGABRT, &act, NULL);
1772
    }
1773
#endif
1774
    abort();
1775
}
1776

    
1777
CPUArchState *cpu_copy(CPUArchState *env)
1778
{
1779
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
1780
    CPUArchState *next_cpu = new_env->next_cpu;
1781
    int cpu_index = new_env->cpu_index;
1782
#if defined(TARGET_HAS_ICE)
1783
    CPUBreakpoint *bp;
1784
    CPUWatchpoint *wp;
1785
#endif
1786

    
1787
    memcpy(new_env, env, sizeof(CPUArchState));
1788

    
1789
    /* Preserve chaining and index. */
1790
    new_env->next_cpu = next_cpu;
1791
    new_env->cpu_index = cpu_index;
1792

    
1793
    /* Clone all break/watchpoints.
1794
       Note: Once we support ptrace with hw-debug register access, make sure
1795
       BP_CPU break/watchpoints are handled correctly on clone. */
1796
    QTAILQ_INIT(&env->breakpoints);
1797
    QTAILQ_INIT(&env->watchpoints);
1798
#if defined(TARGET_HAS_ICE)
1799
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1800
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1801
    }
1802
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1803
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1804
                              wp->flags, NULL);
1805
    }
1806
#endif
1807

    
1808
    return new_env;
1809
}
1810

    
1811
#if !defined(CONFIG_USER_ONLY)
1812
void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1813
{
1814
    unsigned int i;
1815

    
1816
    /* Discard jump cache entries for any tb which might potentially
1817
       overlap the flushed page.  */
1818
    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1819
    memset (&env->tb_jmp_cache[i], 0, 
1820
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1821

    
1822
    i = tb_jmp_cache_hash_page(addr);
1823
    memset (&env->tb_jmp_cache[i], 0, 
1824
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1825
}
1826

    
1827
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1828
                                      uintptr_t length)
1829
{
1830
    uintptr_t start1;
1831

    
1832
    /* we modify the TLB cache so that the dirty bit will be set again
1833
       when accessing the range */
1834
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1835
    /* Check that we don't span multiple blocks - this breaks the
1836
       address comparisons below.  */
1837
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1838
            != (end - 1) - start) {
1839
        abort();
1840
    }
1841
    cpu_tlb_reset_dirty_all(start1, length);
1842

    
1843
}
1844

    
1845
/* Note: start and end must be within the same ram block.  */
1846
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1847
                                     int dirty_flags)
1848
{
1849
    uintptr_t length;
1850

    
1851
    start &= TARGET_PAGE_MASK;
1852
    end = TARGET_PAGE_ALIGN(end);
1853

    
1854
    length = end - start;
1855
    if (length == 0)
1856
        return;
1857
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1858

    
1859
    if (tcg_enabled()) {
1860
        tlb_reset_dirty_range_all(start, end, length);
1861
    }
1862
}
1863

    
1864
int cpu_physical_memory_set_dirty_tracking(int enable)
1865
{
1866
    int ret = 0;
1867
    in_migration = enable;
1868
    return ret;
1869
}
1870

    
1871
target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1872
                                                   MemoryRegionSection *section,
1873
                                                   target_ulong vaddr,
1874
                                                   target_phys_addr_t paddr,
1875
                                                   int prot,
1876
                                                   target_ulong *address)
1877
{
1878
    target_phys_addr_t iotlb;
1879
    CPUWatchpoint *wp;
1880

    
1881
    if (memory_region_is_ram(section->mr)) {
1882
        /* Normal RAM.  */
1883
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1884
            + memory_region_section_addr(section, paddr);
1885
        if (!section->readonly) {
1886
            iotlb |= phys_section_notdirty;
1887
        } else {
1888
            iotlb |= phys_section_rom;
1889
        }
1890
    } else {
1891
        /* IO handlers are currently passed a physical address.
1892
           It would be nice to pass an offset from the base address
1893
           of that region.  This would avoid having to special case RAM,
1894
           and avoid full address decoding in every device.
1895
           We can't use the high bits of pd for this because
1896
           IO_MEM_ROMD uses these as a ram address.  */
1897
        iotlb = section - phys_sections;
1898
        iotlb += memory_region_section_addr(section, paddr);
1899
    }
1900

    
1901
    /* Make accesses to pages with watchpoints go via the
1902
       watchpoint trap routines.  */
1903
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1904
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1905
            /* Avoid trapping reads of pages with a write breakpoint. */
1906
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1907
                iotlb = phys_section_watch + paddr;
1908
                *address |= TLB_MMIO;
1909
                break;
1910
            }
1911
        }
1912
    }
1913

    
1914
    return iotlb;
1915
}
1916

    
1917
#else
1918
/*
1919
 * Walks guest process memory "regions" one by one
1920
 * and calls callback function 'fn' for each region.
1921
 */
1922

    
1923
struct walk_memory_regions_data
1924
{
1925
    walk_memory_regions_fn fn;
1926
    void *priv;
1927
    uintptr_t start;
1928
    int prot;
1929
};
1930

    
1931
static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1932
                                   abi_ulong end, int new_prot)
1933
{
1934
    if (data->start != -1ul) {
1935
        int rc = data->fn(data->priv, data->start, end, data->prot);
1936
        if (rc != 0) {
1937
            return rc;
1938
        }
1939
    }
1940

    
1941
    data->start = (new_prot ? end : -1ul);
1942
    data->prot = new_prot;
1943

    
1944
    return 0;
1945
}
1946

    
1947
static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1948
                                 abi_ulong base, int level, void **lp)
1949
{
1950
    abi_ulong pa;
1951
    int i, rc;
1952

    
1953
    if (*lp == NULL) {
1954
        return walk_memory_regions_end(data, base, 0);
1955
    }
1956

    
1957
    if (level == 0) {
1958
        PageDesc *pd = *lp;
1959
        for (i = 0; i < L2_SIZE; ++i) {
1960
            int prot = pd[i].flags;
1961

    
1962
            pa = base | (i << TARGET_PAGE_BITS);
1963
            if (prot != data->prot) {
1964
                rc = walk_memory_regions_end(data, pa, prot);
1965
                if (rc != 0) {
1966
                    return rc;
1967
                }
1968
            }
1969
        }
1970
    } else {
1971
        void **pp = *lp;
1972
        for (i = 0; i < L2_SIZE; ++i) {
1973
            pa = base | ((abi_ulong)i <<
1974
                (TARGET_PAGE_BITS + L2_BITS * level));
1975
            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1976
            if (rc != 0) {
1977
                return rc;
1978
            }
1979
        }
1980
    }
1981

    
1982
    return 0;
1983
}
1984

    
1985
int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1986
{
1987
    struct walk_memory_regions_data data;
1988
    uintptr_t i;
1989

    
1990
    data.fn = fn;
1991
    data.priv = priv;
1992
    data.start = -1ul;
1993
    data.prot = 0;
1994

    
1995
    for (i = 0; i < V_L1_SIZE; i++) {
1996
        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1997
                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1998
        if (rc != 0) {
1999
            return rc;
2000
        }
2001
    }
2002

    
2003
    return walk_memory_regions_end(&data, 0, 0);
2004
}
2005

    
2006
static int dump_region(void *priv, abi_ulong start,
2007
    abi_ulong end, unsigned long prot)
2008
{
2009
    FILE *f = (FILE *)priv;
2010

    
2011
    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2012
        " "TARGET_ABI_FMT_lx" %c%c%c\n",
2013
        start, end, end - start,
2014
        ((prot & PAGE_READ) ? 'r' : '-'),
2015
        ((prot & PAGE_WRITE) ? 'w' : '-'),
2016
        ((prot & PAGE_EXEC) ? 'x' : '-'));
2017

    
2018
    return (0);
2019
}
2020

    
2021
/* dump memory mappings */
2022
void page_dump(FILE *f)
2023
{
2024
    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2025
            "start", "end", "size", "prot");
2026
    walk_memory_regions(f, dump_region);
2027
}
2028

    
2029
int page_get_flags(target_ulong address)
2030
{
2031
    PageDesc *p;
2032

    
2033
    p = page_find(address >> TARGET_PAGE_BITS);
2034
    if (!p)
2035
        return 0;
2036
    return p->flags;
2037
}
2038

    
2039
/* Modify the flags of a page and invalidate the code if necessary.
2040
   The flag PAGE_WRITE_ORG is positioned automatically depending
2041
   on PAGE_WRITE.  The mmap_lock should already be held.  */
2042
void page_set_flags(target_ulong start, target_ulong end, int flags)
2043
{
2044
    target_ulong addr, len;
2045

    
2046
    /* This function should never be called with addresses outside the
2047
       guest address space.  If this assert fires, it probably indicates
2048
       a missing call to h2g_valid.  */
2049
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2050
    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2051
#endif
2052
    assert(start < end);
2053

    
2054
    start = start & TARGET_PAGE_MASK;
2055
    end = TARGET_PAGE_ALIGN(end);
2056

    
2057
    if (flags & PAGE_WRITE) {
2058
        flags |= PAGE_WRITE_ORG;
2059
    }
2060

    
2061
    for (addr = start, len = end - start;
2062
         len != 0;
2063
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2064
        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2065

    
2066
        /* If the write protection bit is set, then we invalidate
2067
           the code inside.  */
2068
        if (!(p->flags & PAGE_WRITE) &&
2069
            (flags & PAGE_WRITE) &&
2070
            p->first_tb) {
2071
            tb_invalidate_phys_page(addr, 0, NULL);
2072
        }
2073
        p->flags = flags;
2074
    }
2075
}
2076

    
2077
int page_check_range(target_ulong start, target_ulong len, int flags)
2078
{
2079
    PageDesc *p;
2080
    target_ulong end;
2081
    target_ulong addr;
2082

    
2083
    /* This function should never be called with addresses outside the
2084
       guest address space.  If this assert fires, it probably indicates
2085
       a missing call to h2g_valid.  */
2086
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2087
    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2088
#endif
2089

    
2090
    if (len == 0) {
2091
        return 0;
2092
    }
2093
    if (start + len - 1 < start) {
2094
        /* We've wrapped around.  */
2095
        return -1;
2096
    }
2097

    
2098
    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2099
    start = start & TARGET_PAGE_MASK;
2100

    
2101
    for (addr = start, len = end - start;
2102
         len != 0;
2103
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2104
        p = page_find(addr >> TARGET_PAGE_BITS);
2105
        if( !p )
2106
            return -1;
2107
        if( !(p->flags & PAGE_VALID) )
2108
            return -1;
2109

    
2110
        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2111
            return -1;
2112
        if (flags & PAGE_WRITE) {
2113
            if (!(p->flags & PAGE_WRITE_ORG))
2114
                return -1;
2115
            /* unprotect the page if it was put read-only because it
2116
               contains translated code */
2117
            if (!(p->flags & PAGE_WRITE)) {
2118
                if (!page_unprotect(addr, 0, NULL))
2119
                    return -1;
2120
            }
2121
            return 0;
2122
        }
2123
    }
2124
    return 0;
2125
}
2126

    
2127
/* called from signal handler: invalidate the code and unprotect the
2128
   page. Return TRUE if the fault was successfully handled. */
2129
int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2130
{
2131
    unsigned int prot;
2132
    PageDesc *p;
2133
    target_ulong host_start, host_end, addr;
2134

    
2135
    /* Technically this isn't safe inside a signal handler.  However we
2136
       know this only ever happens in a synchronous SEGV handler, so in
2137
       practice it seems to be ok.  */
2138
    mmap_lock();
2139

    
2140
    p = page_find(address >> TARGET_PAGE_BITS);
2141
    if (!p) {
2142
        mmap_unlock();
2143
        return 0;
2144
    }
2145

    
2146
    /* if the page was really writable, then we change its
2147
       protection back to writable */
2148
    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2149
        host_start = address & qemu_host_page_mask;
2150
        host_end = host_start + qemu_host_page_size;
2151

    
2152
        prot = 0;
2153
        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2154
            p = page_find(addr >> TARGET_PAGE_BITS);
2155
            p->flags |= PAGE_WRITE;
2156
            prot |= p->flags;
2157

    
2158
            /* and since the content will be modified, we must invalidate
2159
               the corresponding translated code. */
2160
            tb_invalidate_phys_page(addr, pc, puc);
2161
#ifdef DEBUG_TB_CHECK
2162
            tb_invalidate_check(addr);
2163
#endif
2164
        }
2165
        mprotect((void *)g2h(host_start), qemu_host_page_size,
2166
                 prot & PAGE_BITS);
2167

    
2168
        mmap_unlock();
2169
        return 1;
2170
    }
2171
    mmap_unlock();
2172
    return 0;
2173
}
2174
#endif /* defined(CONFIG_USER_ONLY) */
2175

    
2176
#if !defined(CONFIG_USER_ONLY)
2177

    
2178
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2179
typedef struct subpage_t {
2180
    MemoryRegion iomem;
2181
    target_phys_addr_t base;
2182
    uint16_t sub_section[TARGET_PAGE_SIZE];
2183
} subpage_t;
2184

    
2185
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2186
                             uint16_t section);
2187
static subpage_t *subpage_init(target_phys_addr_t base);
2188
static void destroy_page_desc(uint16_t section_index)
2189
{
2190
    MemoryRegionSection *section = &phys_sections[section_index];
2191
    MemoryRegion *mr = section->mr;
2192

    
2193
    if (mr->subpage) {
2194
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
2195
        memory_region_destroy(&subpage->iomem);
2196
        g_free(subpage);
2197
    }
2198
}
2199

    
2200
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2201
{
2202
    unsigned i;
2203
    PhysPageEntry *p;
2204

    
2205
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
2206
        return;
2207
    }
2208

    
2209
    p = phys_map_nodes[lp->ptr];
2210
    for (i = 0; i < L2_SIZE; ++i) {
2211
        if (!p[i].is_leaf) {
2212
            destroy_l2_mapping(&p[i], level - 1);
2213
        } else {
2214
            destroy_page_desc(p[i].ptr);
2215
        }
2216
    }
2217
    lp->is_leaf = 0;
2218
    lp->ptr = PHYS_MAP_NODE_NIL;
2219
}
2220

    
2221
static void destroy_all_mappings(void)
2222
{
2223
    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2224
    phys_map_nodes_reset();
2225
}
2226

    
2227
static uint16_t phys_section_add(MemoryRegionSection *section)
2228
{
2229
    if (phys_sections_nb == phys_sections_nb_alloc) {
2230
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2231
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
2232
                                phys_sections_nb_alloc);
2233
    }
2234
    phys_sections[phys_sections_nb] = *section;
2235
    return phys_sections_nb++;
2236
}
2237

    
2238
static void phys_sections_clear(void)
2239
{
2240
    phys_sections_nb = 0;
2241
}
2242

    
2243
/* register physical memory.
2244
   For RAM, 'size' must be a multiple of the target page size.
2245
   If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2246
   io memory page.  The address used when calling the IO function is
2247
   the offset from the start of the region, plus region_offset.  Both
2248
   start_addr and region_offset are rounded down to a page boundary
2249
   before calculating this offset.  This should not be a problem unless
2250
   the low bits of start_addr and region_offset differ.  */
2251
static void register_subpage(MemoryRegionSection *section)
2252
{
2253
    subpage_t *subpage;
2254
    target_phys_addr_t base = section->offset_within_address_space
2255
        & TARGET_PAGE_MASK;
2256
    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2257
    MemoryRegionSection subsection = {
2258
        .offset_within_address_space = base,
2259
        .size = TARGET_PAGE_SIZE,
2260
    };
2261
    target_phys_addr_t start, end;
2262

    
2263
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2264

    
2265
    if (!(existing->mr->subpage)) {
2266
        subpage = subpage_init(base);
2267
        subsection.mr = &subpage->iomem;
2268
        phys_page_set(base >> TARGET_PAGE_BITS, 1,
2269
                      phys_section_add(&subsection));
2270
    } else {
2271
        subpage = container_of(existing->mr, subpage_t, iomem);
2272
    }
2273
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2274
    end = start + section->size - 1;
2275
    subpage_register(subpage, start, end, phys_section_add(section));
2276
}
2277

    
2278

    
2279
static void register_multipage(MemoryRegionSection *section)
2280
{
2281
    target_phys_addr_t start_addr = section->offset_within_address_space;
2282
    ram_addr_t size = section->size;
2283
    target_phys_addr_t addr;
2284
    uint16_t section_index = phys_section_add(section);
2285

    
2286
    assert(size);
2287

    
2288
    addr = start_addr;
2289
    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2290
                  section_index);
2291
}
2292

    
2293
void cpu_register_physical_memory_log(MemoryRegionSection *section,
2294
                                      bool readonly)
2295
{
2296
    MemoryRegionSection now = *section, remain = *section;
2297

    
2298
    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2299
        || (now.size < TARGET_PAGE_SIZE)) {
2300
        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2301
                       - now.offset_within_address_space,
2302
                       now.size);
2303
        register_subpage(&now);
2304
        remain.size -= now.size;
2305
        remain.offset_within_address_space += now.size;
2306
        remain.offset_within_region += now.size;
2307
    }
2308
    now = remain;
2309
    now.size &= TARGET_PAGE_MASK;
2310
    if (now.size) {
2311
        register_multipage(&now);
2312
        remain.size -= now.size;
2313
        remain.offset_within_address_space += now.size;
2314
        remain.offset_within_region += now.size;
2315
    }
2316
    now = remain;
2317
    if (now.size) {
2318
        register_subpage(&now);
2319
    }
2320
}
2321

    
2322

    
2323
void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2324
{
2325
    if (kvm_enabled())
2326
        kvm_coalesce_mmio_region(addr, size);
2327
}
2328

    
2329
void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2330
{
2331
    if (kvm_enabled())
2332
        kvm_uncoalesce_mmio_region(addr, size);
2333
}
2334

    
2335
void qemu_flush_coalesced_mmio_buffer(void)
2336
{
2337
    if (kvm_enabled())
2338
        kvm_flush_coalesced_mmio_buffer();
2339
}
2340

    
2341
#if defined(__linux__) && !defined(TARGET_S390X)
2342

    
2343
#include <sys/vfs.h>
2344

    
2345
#define HUGETLBFS_MAGIC       0x958458f6
2346

    
2347
static long gethugepagesize(const char *path)
2348
{
2349
    struct statfs fs;
2350
    int ret;
2351

    
2352
    do {
2353
        ret = statfs(path, &fs);
2354
    } while (ret != 0 && errno == EINTR);
2355

    
2356
    if (ret != 0) {
2357
        perror(path);
2358
        return 0;
2359
    }
2360

    
2361
    if (fs.f_type != HUGETLBFS_MAGIC)
2362
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2363

    
2364
    return fs.f_bsize;
2365
}
2366

    
2367
static void *file_ram_alloc(RAMBlock *block,
2368
                            ram_addr_t memory,
2369
                            const char *path)
2370
{
2371
    char *filename;
2372
    void *area;
2373
    int fd;
2374
#ifdef MAP_POPULATE
2375
    int flags;
2376
#endif
2377
    unsigned long hpagesize;
2378

    
2379
    hpagesize = gethugepagesize(path);
2380
    if (!hpagesize) {
2381
        return NULL;
2382
    }
2383

    
2384
    if (memory < hpagesize) {
2385
        return NULL;
2386
    }
2387

    
2388
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
2389
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2390
        return NULL;
2391
    }
2392

    
2393
    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2394
        return NULL;
2395
    }
2396

    
2397
    fd = mkstemp(filename);
2398
    if (fd < 0) {
2399
        perror("unable to create backing store for hugepages");
2400
        free(filename);
2401
        return NULL;
2402
    }
2403
    unlink(filename);
2404
    free(filename);
2405

    
2406
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
2407

    
2408
    /*
2409
     * ftruncate is not supported by hugetlbfs in older
2410
     * hosts, so don't bother bailing out on errors.
2411
     * If anything goes wrong with it under other filesystems,
2412
     * mmap will fail.
2413
     */
2414
    if (ftruncate(fd, memory))
2415
        perror("ftruncate");
2416

    
2417
#ifdef MAP_POPULATE
2418
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2419
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
2420
     * to sidestep this quirk.
2421
     */
2422
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2423
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2424
#else
2425
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2426
#endif
2427
    if (area == MAP_FAILED) {
2428
        perror("file_ram_alloc: can't mmap RAM pages");
2429
        close(fd);
2430
        return (NULL);
2431
    }
2432
    block->fd = fd;
2433
    return area;
2434
}
2435
#endif
2436

    
2437
static ram_addr_t find_ram_offset(ram_addr_t size)
2438
{
2439
    RAMBlock *block, *next_block;
2440
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2441

    
2442
    if (QLIST_EMPTY(&ram_list.blocks))
2443
        return 0;
2444

    
2445
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2446
        ram_addr_t end, next = RAM_ADDR_MAX;
2447

    
2448
        end = block->offset + block->length;
2449

    
2450
        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2451
            if (next_block->offset >= end) {
2452
                next = MIN(next, next_block->offset);
2453
            }
2454
        }
2455
        if (next - end >= size && next - end < mingap) {
2456
            offset = end;
2457
            mingap = next - end;
2458
        }
2459
    }
2460

    
2461
    if (offset == RAM_ADDR_MAX) {
2462
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2463
                (uint64_t)size);
2464
        abort();
2465
    }
2466

    
2467
    return offset;
2468
}
2469

    
2470
static ram_addr_t last_ram_offset(void)
2471
{
2472
    RAMBlock *block;
2473
    ram_addr_t last = 0;
2474

    
2475
    QLIST_FOREACH(block, &ram_list.blocks, next)
2476
        last = MAX(last, block->offset + block->length);
2477

    
2478
    return last;
2479
}
2480

    
2481
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2482
{
2483
    RAMBlock *new_block, *block;
2484

    
2485
    new_block = NULL;
2486
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2487
        if (block->offset == addr) {
2488
            new_block = block;
2489
            break;
2490
        }
2491
    }
2492
    assert(new_block);
2493
    assert(!new_block->idstr[0]);
2494

    
2495
    if (dev) {
2496
        char *id = qdev_get_dev_path(dev);
2497
        if (id) {
2498
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2499
            g_free(id);
2500
        }
2501
    }
2502
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2503

    
2504
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2505
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2506
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2507
                    new_block->idstr);
2508
            abort();
2509
        }
2510
    }
2511
}
2512

    
2513
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2514
                                   MemoryRegion *mr)
2515
{
2516
    RAMBlock *new_block;
2517

    
2518
    size = TARGET_PAGE_ALIGN(size);
2519
    new_block = g_malloc0(sizeof(*new_block));
2520

    
2521
    new_block->mr = mr;
2522
    new_block->offset = find_ram_offset(size);
2523
    if (host) {
2524
        new_block->host = host;
2525
        new_block->flags |= RAM_PREALLOC_MASK;
2526
    } else {
2527
        if (mem_path) {
2528
#if defined (__linux__) && !defined(TARGET_S390X)
2529
            new_block->host = file_ram_alloc(new_block, size, mem_path);
2530
            if (!new_block->host) {
2531
                new_block->host = qemu_vmalloc(size);
2532
                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2533
            }
2534
#else
2535
            fprintf(stderr, "-mem-path option unsupported\n");
2536
            exit(1);
2537
#endif
2538
        } else {
2539
            if (xen_enabled()) {
2540
                xen_ram_alloc(new_block->offset, size, mr);
2541
            } else if (kvm_enabled()) {
2542
                /* some s390/kvm configurations have special constraints */
2543
                new_block->host = kvm_vmalloc(size);
2544
            } else {
2545
                new_block->host = qemu_vmalloc(size);
2546
            }
2547
            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2548
        }
2549
    }
2550
    new_block->length = size;
2551

    
2552
    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2553

    
2554
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2555
                                       last_ram_offset() >> TARGET_PAGE_BITS);
2556
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2557

    
2558
    if (kvm_enabled())
2559
        kvm_setup_guest_memory(new_block->host, size);
2560

    
2561
    return new_block->offset;
2562
}
2563

    
2564
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2565
{
2566
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
2567
}
2568

    
2569
void qemu_ram_free_from_ptr(ram_addr_t addr)
2570
{
2571
    RAMBlock *block;
2572

    
2573
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2574
        if (addr == block->offset) {
2575
            QLIST_REMOVE(block, next);
2576
            g_free(block);
2577
            return;
2578
        }
2579
    }
2580
}
2581

    
2582
void qemu_ram_free(ram_addr_t addr)
2583
{
2584
    RAMBlock *block;
2585

    
2586
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2587
        if (addr == block->offset) {
2588
            QLIST_REMOVE(block, next);
2589
            if (block->flags & RAM_PREALLOC_MASK) {
2590
                ;
2591
            } else if (mem_path) {
2592
#if defined (__linux__) && !defined(TARGET_S390X)
2593
                if (block->fd) {
2594
                    munmap(block->host, block->length);
2595
                    close(block->fd);
2596
                } else {
2597
                    qemu_vfree(block->host);
2598
                }
2599
#else
2600
                abort();
2601
#endif
2602
            } else {
2603
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2604
                munmap(block->host, block->length);
2605
#else
2606
                if (xen_enabled()) {
2607
                    xen_invalidate_map_cache_entry(block->host);
2608
                } else {
2609
                    qemu_vfree(block->host);
2610
                }
2611
#endif
2612
            }
2613
            g_free(block);
2614
            return;
2615
        }
2616
    }
2617

    
2618
}
2619

    
2620
#ifndef _WIN32
2621
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2622
{
2623
    RAMBlock *block;
2624
    ram_addr_t offset;
2625
    int flags;
2626
    void *area, *vaddr;
2627

    
2628
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2629
        offset = addr - block->offset;
2630
        if (offset < block->length) {
2631
            vaddr = block->host + offset;
2632
            if (block->flags & RAM_PREALLOC_MASK) {
2633
                ;
2634
            } else {
2635
                flags = MAP_FIXED;
2636
                munmap(vaddr, length);
2637
                if (mem_path) {
2638
#if defined(__linux__) && !defined(TARGET_S390X)
2639
                    if (block->fd) {
2640
#ifdef MAP_POPULATE
2641
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2642
                            MAP_PRIVATE;
2643
#else
2644
                        flags |= MAP_PRIVATE;
2645
#endif
2646
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2647
                                    flags, block->fd, offset);
2648
                    } else {
2649
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2650
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2651
                                    flags, -1, 0);
2652
                    }
2653
#else
2654
                    abort();
2655
#endif
2656
                } else {
2657
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2658
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
2659
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2660
                                flags, -1, 0);
2661
#else
2662
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2663
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2664
                                flags, -1, 0);
2665
#endif
2666
                }
2667
                if (area != vaddr) {
2668
                    fprintf(stderr, "Could not remap addr: "
2669
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2670
                            length, addr);
2671
                    exit(1);
2672
                }
2673
                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2674
            }
2675
            return;
2676
        }
2677
    }
2678
}
2679
#endif /* !_WIN32 */
2680

    
2681
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2682
   With the exception of the softmmu code in this file, this should
2683
   only be used for local memory (e.g. video ram) that the device owns,
2684
   and knows it isn't going to access beyond the end of the block.
2685

2686
   It should not be used for general purpose DMA.
2687
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2688
 */
2689
void *qemu_get_ram_ptr(ram_addr_t addr)
2690
{
2691
    RAMBlock *block;
2692

    
2693
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2694
        if (addr - block->offset < block->length) {
2695
            /* Move this entry to to start of the list.  */
2696
            if (block != QLIST_FIRST(&ram_list.blocks)) {
2697
                QLIST_REMOVE(block, next);
2698
                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2699
            }
2700
            if (xen_enabled()) {
2701
                /* We need to check if the requested address is in the RAM
2702
                 * because we don't want to map the entire memory in QEMU.
2703
                 * In that case just map until the end of the page.
2704
                 */
2705
                if (block->offset == 0) {
2706
                    return xen_map_cache(addr, 0, 0);
2707
                } else if (block->host == NULL) {
2708
                    block->host =
2709
                        xen_map_cache(block->offset, block->length, 1);
2710
                }
2711
            }
2712
            return block->host + (addr - block->offset);
2713
        }
2714
    }
2715

    
2716
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2717
    abort();
2718

    
2719
    return NULL;
2720
}
2721

    
2722
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2723
 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2724
 */
2725
void *qemu_safe_ram_ptr(ram_addr_t addr)
2726
{
2727
    RAMBlock *block;
2728

    
2729
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2730
        if (addr - block->offset < block->length) {
2731
            if (xen_enabled()) {
2732
                /* We need to check if the requested address is in the RAM
2733
                 * because we don't want to map the entire memory in QEMU.
2734
                 * In that case just map until the end of the page.
2735
                 */
2736
                if (block->offset == 0) {
2737
                    return xen_map_cache(addr, 0, 0);
2738
                } else if (block->host == NULL) {
2739
                    block->host =
2740
                        xen_map_cache(block->offset, block->length, 1);
2741
                }
2742
            }
2743
            return block->host + (addr - block->offset);
2744
        }
2745
    }
2746

    
2747
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2748
    abort();
2749

    
2750
    return NULL;
2751
}
2752

    
2753
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2754
 * but takes a size argument */
2755
void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2756
{
2757
    if (*size == 0) {
2758
        return NULL;
2759
    }
2760
    if (xen_enabled()) {
2761
        return xen_map_cache(addr, *size, 1);
2762
    } else {
2763
        RAMBlock *block;
2764

    
2765
        QLIST_FOREACH(block, &ram_list.blocks, next) {
2766
            if (addr - block->offset < block->length) {
2767
                if (addr - block->offset + *size > block->length)
2768
                    *size = block->length - addr + block->offset;
2769
                return block->host + (addr - block->offset);
2770
            }
2771
        }
2772

    
2773
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2774
        abort();
2775
    }
2776
}
2777

    
2778
void qemu_put_ram_ptr(void *addr)
2779
{
2780
    trace_qemu_put_ram_ptr(addr);
2781
}
2782

    
2783
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2784
{
2785
    RAMBlock *block;
2786
    uint8_t *host = ptr;
2787

    
2788
    if (xen_enabled()) {
2789
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2790
        return 0;
2791
    }
2792

    
2793
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2794
        /* This case append when the block is not mapped. */
2795
        if (block->host == NULL) {
2796
            continue;
2797
        }
2798
        if (host - block->host < block->length) {
2799
            *ram_addr = block->offset + (host - block->host);
2800
            return 0;
2801
        }
2802
    }
2803

    
2804
    return -1;
2805
}
2806

    
2807
/* Some of the softmmu routines need to translate from a host pointer
2808
   (typically a TLB entry) back to a ram offset.  */
2809
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2810
{
2811
    ram_addr_t ram_addr;
2812

    
2813
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2814
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
2815
        abort();
2816
    }
2817
    return ram_addr;
2818
}
2819

    
2820
static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2821
                                    unsigned size)
2822
{
2823
#ifdef DEBUG_UNASSIGNED
2824
    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2825
#endif
2826
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2827
    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2828
#endif
2829
    return 0;
2830
}
2831

    
2832
static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2833
                                 uint64_t val, unsigned size)
2834
{
2835
#ifdef DEBUG_UNASSIGNED
2836
    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2837
#endif
2838
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2839
    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2840
#endif
2841
}
2842

    
2843
static const MemoryRegionOps unassigned_mem_ops = {
2844
    .read = unassigned_mem_read,
2845
    .write = unassigned_mem_write,
2846
    .endianness = DEVICE_NATIVE_ENDIAN,
2847
};
2848

    
2849
static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2850
                               unsigned size)
2851
{
2852
    abort();
2853
}
2854

    
2855
static void error_mem_write(void *opaque, target_phys_addr_t addr,
2856
                            uint64_t value, unsigned size)
2857
{
2858
    abort();
2859
}
2860

    
2861
static const MemoryRegionOps error_mem_ops = {
2862
    .read = error_mem_read,
2863
    .write = error_mem_write,
2864
    .endianness = DEVICE_NATIVE_ENDIAN,
2865
};
2866

    
2867
static const MemoryRegionOps rom_mem_ops = {
2868
    .read = error_mem_read,
2869
    .write = unassigned_mem_write,
2870
    .endianness = DEVICE_NATIVE_ENDIAN,
2871
};
2872

    
2873
static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2874
                               uint64_t val, unsigned size)
2875
{
2876
    int dirty_flags;
2877
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2878
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2879
#if !defined(CONFIG_USER_ONLY)
2880
        tb_invalidate_phys_page_fast(ram_addr, size);
2881
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2882
#endif
2883
    }
2884
    switch (size) {
2885
    case 1:
2886
        stb_p(qemu_get_ram_ptr(ram_addr), val);
2887
        break;
2888
    case 2:
2889
        stw_p(qemu_get_ram_ptr(ram_addr), val);
2890
        break;
2891
    case 4:
2892
        stl_p(qemu_get_ram_ptr(ram_addr), val);
2893
        break;
2894
    default:
2895
        abort();
2896
    }
2897
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2898
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2899
    /* we remove the notdirty callback only if the code has been
2900
       flushed */
2901
    if (dirty_flags == 0xff)
2902
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2903
}
2904

    
2905
static const MemoryRegionOps notdirty_mem_ops = {
2906
    .read = error_mem_read,
2907
    .write = notdirty_mem_write,
2908
    .endianness = DEVICE_NATIVE_ENDIAN,
2909
};
2910

    
2911
/* Generate a debug exception if a watchpoint has been hit.  */
2912
static void check_watchpoint(int offset, int len_mask, int flags)
2913
{
2914
    CPUArchState *env = cpu_single_env;
2915
    target_ulong pc, cs_base;
2916
    TranslationBlock *tb;
2917
    target_ulong vaddr;
2918
    CPUWatchpoint *wp;
2919
    int cpu_flags;
2920

    
2921
    if (env->watchpoint_hit) {
2922
        /* We re-entered the check after replacing the TB. Now raise
2923
         * the debug interrupt so that is will trigger after the
2924
         * current instruction. */
2925
        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2926
        return;
2927
    }
2928
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2929
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2930
        if ((vaddr == (wp->vaddr & len_mask) ||
2931
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2932
            wp->flags |= BP_WATCHPOINT_HIT;
2933
            if (!env->watchpoint_hit) {
2934
                env->watchpoint_hit = wp;
2935
                tb = tb_find_pc(env->mem_io_pc);
2936
                if (!tb) {
2937
                    cpu_abort(env, "check_watchpoint: could not find TB for "
2938
                              "pc=%p", (void *)env->mem_io_pc);
2939
                }
2940
                cpu_restore_state(tb, env, env->mem_io_pc);
2941
                tb_phys_invalidate(tb, -1);
2942
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2943
                    env->exception_index = EXCP_DEBUG;
2944
                    cpu_loop_exit(env);
2945
                } else {
2946
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2947
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2948
                    cpu_resume_from_signal(env, NULL);
2949
                }
2950
            }
2951
        } else {
2952
            wp->flags &= ~BP_WATCHPOINT_HIT;
2953
        }
2954
    }
2955
}
2956

    
2957
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2958
   so these check for a hit then pass through to the normal out-of-line
2959
   phys routines.  */
2960
static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2961
                               unsigned size)
2962
{
2963
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2964
    switch (size) {
2965
    case 1: return ldub_phys(addr);
2966
    case 2: return lduw_phys(addr);
2967
    case 4: return ldl_phys(addr);
2968
    default: abort();
2969
    }
2970
}
2971

    
2972
static void watch_mem_write(void *opaque, target_phys_addr_t addr,
2973
                            uint64_t val, unsigned size)
2974
{
2975
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2976
    switch (size) {
2977
    case 1:
2978
        stb_phys(addr, val);
2979
        break;
2980
    case 2:
2981
        stw_phys(addr, val);
2982
        break;
2983
    case 4:
2984
        stl_phys(addr, val);
2985
        break;
2986
    default: abort();
2987
    }
2988
}
2989

    
2990
static const MemoryRegionOps watch_mem_ops = {
2991
    .read = watch_mem_read,
2992
    .write = watch_mem_write,
2993
    .endianness = DEVICE_NATIVE_ENDIAN,
2994
};
2995

    
2996
static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
2997
                             unsigned len)
2998
{
2999
    subpage_t *mmio = opaque;
3000
    unsigned int idx = SUBPAGE_IDX(addr);
3001
    MemoryRegionSection *section;
3002
#if defined(DEBUG_SUBPAGE)
3003
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3004
           mmio, len, addr, idx);
3005
#endif
3006

    
3007
    section = &phys_sections[mmio->sub_section[idx]];
3008
    addr += mmio->base;
3009
    addr -= section->offset_within_address_space;
3010
    addr += section->offset_within_region;
3011
    return io_mem_read(section->mr, addr, len);
3012
}
3013

    
3014
static void subpage_write(void *opaque, target_phys_addr_t addr,
3015
                          uint64_t value, unsigned len)
3016
{
3017
    subpage_t *mmio = opaque;
3018
    unsigned int idx = SUBPAGE_IDX(addr);
3019
    MemoryRegionSection *section;
3020
#if defined(DEBUG_SUBPAGE)
3021
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3022
           " idx %d value %"PRIx64"\n",
3023
           __func__, mmio, len, addr, idx, value);
3024
#endif
3025

    
3026
    section = &phys_sections[mmio->sub_section[idx]];
3027
    addr += mmio->base;
3028
    addr -= section->offset_within_address_space;
3029
    addr += section->offset_within_region;
3030
    io_mem_write(section->mr, addr, value, len);
3031
}
3032

    
3033
static const MemoryRegionOps subpage_ops = {
3034
    .read = subpage_read,
3035
    .write = subpage_write,
3036
    .endianness = DEVICE_NATIVE_ENDIAN,
3037
};
3038

    
3039
static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3040
                                 unsigned size)
3041
{
3042
    ram_addr_t raddr = addr;
3043
    void *ptr = qemu_get_ram_ptr(raddr);
3044
    switch (size) {
3045
    case 1: return ldub_p(ptr);
3046
    case 2: return lduw_p(ptr);
3047
    case 4: return ldl_p(ptr);
3048
    default: abort();
3049
    }
3050
}
3051

    
3052
static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3053
                              uint64_t value, unsigned size)
3054
{
3055
    ram_addr_t raddr = addr;
3056
    void *ptr = qemu_get_ram_ptr(raddr);
3057
    switch (size) {
3058
    case 1: return stb_p(ptr, value);
3059
    case 2: return stw_p(ptr, value);
3060
    case 4: return stl_p(ptr, value);
3061
    default: abort();
3062
    }
3063
}
3064

    
3065
static const MemoryRegionOps subpage_ram_ops = {
3066
    .read = subpage_ram_read,
3067
    .write = subpage_ram_write,
3068
    .endianness = DEVICE_NATIVE_ENDIAN,
3069
};
3070

    
3071
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3072
                             uint16_t section)
3073
{
3074
    int idx, eidx;
3075

    
3076
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3077
        return -1;
3078
    idx = SUBPAGE_IDX(start);
3079
    eidx = SUBPAGE_IDX(end);
3080
#if defined(DEBUG_SUBPAGE)
3081
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3082
           mmio, start, end, idx, eidx, memory);
3083
#endif
3084
    if (memory_region_is_ram(phys_sections[section].mr)) {
3085
        MemoryRegionSection new_section = phys_sections[section];
3086
        new_section.mr = &io_mem_subpage_ram;
3087
        section = phys_section_add(&new_section);
3088
    }
3089
    for (; idx <= eidx; idx++) {
3090
        mmio->sub_section[idx] = section;
3091
    }
3092

    
3093
    return 0;
3094
}
3095

    
3096
static subpage_t *subpage_init(target_phys_addr_t base)
3097
{
3098
    subpage_t *mmio;
3099

    
3100
    mmio = g_malloc0(sizeof(subpage_t));
3101

    
3102
    mmio->base = base;
3103
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3104
                          "subpage", TARGET_PAGE_SIZE);
3105
    mmio->iomem.subpage = true;
3106
#if defined(DEBUG_SUBPAGE)
3107
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3108
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3109
#endif
3110
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3111

    
3112
    return mmio;
3113
}
3114

    
3115
static uint16_t dummy_section(MemoryRegion *mr)
3116
{
3117
    MemoryRegionSection section = {
3118
        .mr = mr,
3119
        .offset_within_address_space = 0,
3120
        .offset_within_region = 0,
3121
        .size = UINT64_MAX,
3122
    };
3123

    
3124
    return phys_section_add(&section);
3125
}
3126

    
3127
MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3128
{
3129
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3130
}
3131

    
3132
static void io_mem_init(void)
3133
{
3134
    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3135
    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3136
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3137
                          "unassigned", UINT64_MAX);
3138
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3139
                          "notdirty", UINT64_MAX);
3140
    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3141
                          "subpage-ram", UINT64_MAX);
3142
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3143
                          "watch", UINT64_MAX);
3144
}
3145

    
3146
static void core_begin(MemoryListener *listener)
3147
{
3148
    destroy_all_mappings();
3149
    phys_sections_clear();
3150
    phys_map.ptr = PHYS_MAP_NODE_NIL;
3151
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
3152
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
3153
    phys_section_rom = dummy_section(&io_mem_rom);
3154
    phys_section_watch = dummy_section(&io_mem_watch);
3155
}
3156

    
3157
static void core_commit(MemoryListener *listener)
3158
{
3159
    CPUArchState *env;
3160

    
3161
    /* since each CPU stores ram addresses in its TLB cache, we must
3162
       reset the modified entries */
3163
    /* XXX: slow ! */
3164
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
3165
        tlb_flush(env, 1);
3166
    }
3167
}
3168

    
3169
static void core_region_add(MemoryListener *listener,
3170
                            MemoryRegionSection *section)
3171
{
3172
    cpu_register_physical_memory_log(section, section->readonly);
3173
}
3174

    
3175
static void core_region_del(MemoryListener *listener,
3176
                            MemoryRegionSection *section)
3177
{
3178
}
3179

    
3180
static void core_region_nop(MemoryListener *listener,
3181
                            MemoryRegionSection *section)
3182
{
3183
    cpu_register_physical_memory_log(section, section->readonly);
3184
}
3185

    
3186
static void core_log_start(MemoryListener *listener,
3187
                           MemoryRegionSection *section)
3188
{
3189
}
3190

    
3191
static void core_log_stop(MemoryListener *listener,
3192
                          MemoryRegionSection *section)
3193
{
3194
}
3195

    
3196
static void core_log_sync(MemoryListener *listener,
3197
                          MemoryRegionSection *section)
3198
{
3199
}
3200

    
3201
static void core_log_global_start(MemoryListener *listener)
3202
{
3203
    cpu_physical_memory_set_dirty_tracking(1);
3204
}
3205

    
3206
static void core_log_global_stop(MemoryListener *listener)
3207
{
3208
    cpu_physical_memory_set_dirty_tracking(0);
3209
}
3210

    
3211
static void core_eventfd_add(MemoryListener *listener,
3212
                             MemoryRegionSection *section,
3213
                             bool match_data, uint64_t data, EventNotifier *e)
3214
{
3215
}
3216

    
3217
static void core_eventfd_del(MemoryListener *listener,
3218
                             MemoryRegionSection *section,
3219
                             bool match_data, uint64_t data, EventNotifier *e)
3220
{
3221
}
3222

    
3223
static void io_begin(MemoryListener *listener)
3224
{
3225
}
3226

    
3227
static void io_commit(MemoryListener *listener)
3228
{
3229
}
3230

    
3231
static void io_region_add(MemoryListener *listener,
3232
                          MemoryRegionSection *section)
3233
{
3234
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3235

    
3236
    mrio->mr = section->mr;
3237
    mrio->offset = section->offset_within_region;
3238
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3239
                 section->offset_within_address_space, section->size);
3240
    ioport_register(&mrio->iorange);
3241
}
3242

    
3243
static void io_region_del(MemoryListener *listener,
3244
                          MemoryRegionSection *section)
3245
{
3246
    isa_unassign_ioport(section->offset_within_address_space, section->size);
3247
}
3248

    
3249
static void io_region_nop(MemoryListener *listener,
3250
                          MemoryRegionSection *section)
3251
{
3252
}
3253

    
3254
static void io_log_start(MemoryListener *listener,
3255
                         MemoryRegionSection *section)
3256
{
3257
}
3258

    
3259
static void io_log_stop(MemoryListener *listener,
3260
                        MemoryRegionSection *section)
3261
{
3262
}
3263

    
3264
static void io_log_sync(MemoryListener *listener,
3265
                        MemoryRegionSection *section)
3266
{
3267
}
3268

    
3269
static void io_log_global_start(MemoryListener *listener)
3270
{
3271
}
3272

    
3273
static void io_log_global_stop(MemoryListener *listener)
3274
{
3275
}
3276

    
3277
static void io_eventfd_add(MemoryListener *listener,
3278
                           MemoryRegionSection *section,
3279
                           bool match_data, uint64_t data, EventNotifier *e)
3280
{
3281
}
3282

    
3283
static void io_eventfd_del(MemoryListener *listener,
3284
                           MemoryRegionSection *section,
3285
                           bool match_data, uint64_t data, EventNotifier *e)
3286
{
3287
}
3288

    
3289
static MemoryListener core_memory_listener = {
3290
    .begin = core_begin,
3291
    .commit = core_commit,
3292
    .region_add = core_region_add,
3293
    .region_del = core_region_del,
3294
    .region_nop = core_region_nop,
3295
    .log_start = core_log_start,
3296
    .log_stop = core_log_stop,
3297
    .log_sync = core_log_sync,
3298
    .log_global_start = core_log_global_start,
3299
    .log_global_stop = core_log_global_stop,
3300
    .eventfd_add = core_eventfd_add,
3301
    .eventfd_del = core_eventfd_del,
3302
    .priority = 0,
3303
};
3304

    
3305
static MemoryListener io_memory_listener = {
3306
    .begin = io_begin,
3307
    .commit = io_commit,
3308
    .region_add = io_region_add,
3309
    .region_del = io_region_del,
3310
    .region_nop = io_region_nop,
3311
    .log_start = io_log_start,
3312
    .log_stop = io_log_stop,
3313
    .log_sync = io_log_sync,
3314
    .log_global_start = io_log_global_start,
3315
    .log_global_stop = io_log_global_stop,
3316
    .eventfd_add = io_eventfd_add,
3317
    .eventfd_del = io_eventfd_del,
3318
    .priority = 0,
3319
};
3320

    
3321
static void memory_map_init(void)
3322
{
3323
    system_memory = g_malloc(sizeof(*system_memory));
3324
    memory_region_init(system_memory, "system", INT64_MAX);
3325
    set_system_memory_map(system_memory);
3326

    
3327
    system_io = g_malloc(sizeof(*system_io));
3328
    memory_region_init(system_io, "io", 65536);
3329
    set_system_io_map(system_io);
3330

    
3331
    memory_listener_register(&core_memory_listener, system_memory);
3332
    memory_listener_register(&io_memory_listener, system_io);
3333
}
3334

    
3335
MemoryRegion *get_system_memory(void)
3336
{
3337
    return system_memory;
3338
}
3339

    
3340
MemoryRegion *get_system_io(void)
3341
{
3342
    return system_io;
3343
}
3344

    
3345
#endif /* !defined(CONFIG_USER_ONLY) */
3346

    
3347
/* physical memory access (slow version, mainly for debug) */
3348
#if defined(CONFIG_USER_ONLY)
3349
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3350
                        uint8_t *buf, int len, int is_write)
3351
{
3352
    int l, flags;
3353
    target_ulong page;
3354
    void * p;
3355

    
3356
    while (len > 0) {
3357
        page = addr & TARGET_PAGE_MASK;
3358
        l = (page + TARGET_PAGE_SIZE) - addr;
3359
        if (l > len)
3360
            l = len;
3361
        flags = page_get_flags(page);
3362
        if (!(flags & PAGE_VALID))
3363
            return -1;
3364
        if (is_write) {
3365
            if (!(flags & PAGE_WRITE))
3366
                return -1;
3367
            /* XXX: this code should not depend on lock_user */
3368
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3369
                return -1;
3370
            memcpy(p, buf, l);
3371
            unlock_user(p, addr, l);
3372
        } else {
3373
            if (!(flags & PAGE_READ))
3374
                return -1;
3375
            /* XXX: this code should not depend on lock_user */
3376
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3377
                return -1;
3378
            memcpy(buf, p, l);
3379
            unlock_user(p, addr, 0);
3380
        }
3381
        len -= l;
3382
        buf += l;
3383
        addr += l;
3384
    }
3385
    return 0;
3386
}
3387

    
3388
#else
3389
void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3390
                            int len, int is_write)
3391
{
3392
    int l;
3393
    uint8_t *ptr;
3394
    uint32_t val;
3395
    target_phys_addr_t page;
3396
    MemoryRegionSection *section;
3397

    
3398
    while (len > 0) {
3399
        page = addr & TARGET_PAGE_MASK;
3400
        l = (page + TARGET_PAGE_SIZE) - addr;
3401
        if (l > len)
3402
            l = len;
3403
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3404

    
3405
        if (is_write) {
3406
            if (!memory_region_is_ram(section->mr)) {
3407
                target_phys_addr_t addr1;
3408
                addr1 = memory_region_section_addr(section, addr);
3409
                /* XXX: could force cpu_single_env to NULL to avoid
3410
                   potential bugs */
3411
                if (l >= 4 && ((addr1 & 3) == 0)) {
3412
                    /* 32 bit write access */
3413
                    val = ldl_p(buf);
3414
                    io_mem_write(section->mr, addr1, val, 4);
3415
                    l = 4;
3416
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3417
                    /* 16 bit write access */
3418
                    val = lduw_p(buf);
3419
                    io_mem_write(section->mr, addr1, val, 2);
3420
                    l = 2;
3421
                } else {
3422
                    /* 8 bit write access */
3423
                    val = ldub_p(buf);
3424
                    io_mem_write(section->mr, addr1, val, 1);
3425
                    l = 1;
3426
                }
3427
            } else if (!section->readonly) {
3428
                ram_addr_t addr1;
3429
                addr1 = memory_region_get_ram_addr(section->mr)
3430
                    + memory_region_section_addr(section, addr);
3431
                /* RAM case */
3432
                ptr = qemu_get_ram_ptr(addr1);
3433
                memcpy(ptr, buf, l);
3434
                if (!cpu_physical_memory_is_dirty(addr1)) {
3435
                    /* invalidate code */
3436
                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3437
                    /* set dirty bit */
3438
                    cpu_physical_memory_set_dirty_flags(
3439
                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3440
                }
3441
                qemu_put_ram_ptr(ptr);
3442
            }
3443
        } else {
3444
            if (!(memory_region_is_ram(section->mr) ||
3445
                  memory_region_is_romd(section->mr))) {
3446
                target_phys_addr_t addr1;
3447
                /* I/O case */
3448
                addr1 = memory_region_section_addr(section, addr);
3449
                if (l >= 4 && ((addr1 & 3) == 0)) {
3450
                    /* 32 bit read access */
3451
                    val = io_mem_read(section->mr, addr1, 4);
3452
                    stl_p(buf, val);
3453
                    l = 4;
3454
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3455
                    /* 16 bit read access */
3456
                    val = io_mem_read(section->mr, addr1, 2);
3457
                    stw_p(buf, val);
3458
                    l = 2;
3459
                } else {
3460
                    /* 8 bit read access */
3461
                    val = io_mem_read(section->mr, addr1, 1);
3462
                    stb_p(buf, val);
3463
                    l = 1;
3464
                }
3465
            } else {
3466
                /* RAM case */
3467
                ptr = qemu_get_ram_ptr(section->mr->ram_addr
3468
                                       + memory_region_section_addr(section,
3469
                                                                    addr));
3470
                memcpy(buf, ptr, l);
3471
                qemu_put_ram_ptr(ptr);
3472
            }
3473
        }
3474
        len -= l;
3475
        buf += l;
3476
        addr += l;
3477
    }
3478
}
3479

    
3480
/* used for ROM loading : can write in RAM and ROM */
3481
void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3482
                                   const uint8_t *buf, int len)
3483
{
3484
    int l;
3485
    uint8_t *ptr;
3486
    target_phys_addr_t page;
3487
    MemoryRegionSection *section;
3488

    
3489
    while (len > 0) {
3490
        page = addr & TARGET_PAGE_MASK;
3491
        l = (page + TARGET_PAGE_SIZE) - addr;
3492
        if (l > len)
3493
            l = len;
3494
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3495

    
3496
        if (!(memory_region_is_ram(section->mr) ||
3497
              memory_region_is_romd(section->mr))) {
3498
            /* do nothing */
3499
        } else {
3500
            unsigned long addr1;
3501
            addr1 = memory_region_get_ram_addr(section->mr)
3502
                + memory_region_section_addr(section, addr);
3503
            /* ROM/RAM case */
3504
            ptr = qemu_get_ram_ptr(addr1);
3505
            memcpy(ptr, buf, l);
3506
            qemu_put_ram_ptr(ptr);
3507
        }
3508
        len -= l;
3509
        buf += l;
3510
        addr += l;
3511
    }
3512
}
3513

    
3514
typedef struct {
3515
    void *buffer;
3516
    target_phys_addr_t addr;
3517
    target_phys_addr_t len;
3518
} BounceBuffer;
3519

    
3520
static BounceBuffer bounce;
3521

    
3522
typedef struct MapClient {
3523
    void *opaque;
3524
    void (*callback)(void *opaque);
3525
    QLIST_ENTRY(MapClient) link;
3526
} MapClient;
3527

    
3528
static QLIST_HEAD(map_client_list, MapClient) map_client_list
3529
    = QLIST_HEAD_INITIALIZER(map_client_list);
3530

    
3531
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3532
{
3533
    MapClient *client = g_malloc(sizeof(*client));
3534

    
3535
    client->opaque = opaque;
3536
    client->callback = callback;
3537
    QLIST_INSERT_HEAD(&map_client_list, client, link);
3538
    return client;
3539
}
3540

    
3541
void cpu_unregister_map_client(void *_client)
3542
{
3543
    MapClient *client = (MapClient *)_client;
3544

    
3545
    QLIST_REMOVE(client, link);
3546
    g_free(client);
3547
}
3548

    
3549
static void cpu_notify_map_clients(void)
3550
{
3551
    MapClient *client;
3552

    
3553
    while (!QLIST_EMPTY(&map_client_list)) {
3554
        client = QLIST_FIRST(&map_client_list);
3555
        client->callback(client->opaque);
3556
        cpu_unregister_map_client(client);
3557
    }
3558
}
3559

    
3560
/* Map a physical memory region into a host virtual address.
3561
 * May map a subset of the requested range, given by and returned in *plen.
3562
 * May return NULL if resources needed to perform the mapping are exhausted.
3563
 * Use only for reads OR writes - not for read-modify-write operations.
3564
 * Use cpu_register_map_client() to know when retrying the map operation is
3565
 * likely to succeed.
3566
 */
3567
void *cpu_physical_memory_map(target_phys_addr_t addr,
3568
                              target_phys_addr_t *plen,
3569
                              int is_write)
3570
{
3571
    target_phys_addr_t len = *plen;
3572
    target_phys_addr_t todo = 0;
3573
    int l;
3574
    target_phys_addr_t page;
3575
    MemoryRegionSection *section;
3576
    ram_addr_t raddr = RAM_ADDR_MAX;
3577
    ram_addr_t rlen;
3578
    void *ret;
3579

    
3580
    while (len > 0) {
3581
        page = addr & TARGET_PAGE_MASK;
3582
        l = (page + TARGET_PAGE_SIZE) - addr;
3583
        if (l > len)
3584
            l = len;
3585
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3586

    
3587
        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3588
            if (todo || bounce.buffer) {
3589
                break;
3590
            }
3591
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3592
            bounce.addr = addr;
3593
            bounce.len = l;
3594
            if (!is_write) {
3595
                cpu_physical_memory_read(addr, bounce.buffer, l);
3596
            }
3597

    
3598
            *plen = l;
3599
            return bounce.buffer;
3600
        }
3601
        if (!todo) {
3602
            raddr = memory_region_get_ram_addr(section->mr)
3603
                + memory_region_section_addr(section, addr);
3604
        }
3605

    
3606
        len -= l;
3607
        addr += l;
3608
        todo += l;
3609
    }
3610
    rlen = todo;
3611
    ret = qemu_ram_ptr_length(raddr, &rlen);
3612
    *plen = rlen;
3613
    return ret;
3614
}
3615

    
3616
/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3617
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3618
 * the amount of memory that was actually read or written by the caller.
3619
 */
3620
void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3621
                               int is_write, target_phys_addr_t access_len)
3622
{
3623
    if (buffer != bounce.buffer) {
3624
        if (is_write) {
3625
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3626
            while (access_len) {
3627
                unsigned l;
3628
                l = TARGET_PAGE_SIZE;
3629
                if (l > access_len)
3630
                    l = access_len;
3631
                if (!cpu_physical_memory_is_dirty(addr1)) {
3632
                    /* invalidate code */
3633
                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3634
                    /* set dirty bit */
3635
                    cpu_physical_memory_set_dirty_flags(
3636
                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3637
                }
3638
                addr1 += l;
3639
                access_len -= l;
3640
            }
3641
        }
3642
        if (xen_enabled()) {
3643
            xen_invalidate_map_cache_entry(buffer);
3644
        }
3645
        return;
3646
    }
3647
    if (is_write) {
3648
        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3649
    }
3650
    qemu_vfree(bounce.buffer);
3651
    bounce.buffer = NULL;
3652
    cpu_notify_map_clients();
3653
}
3654

    
3655
/* warning: addr must be aligned */
3656
static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3657
                                         enum device_endian endian)
3658
{
3659
    uint8_t *ptr;
3660
    uint32_t val;
3661
    MemoryRegionSection *section;
3662

    
3663
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3664

    
3665
    if (!(memory_region_is_ram(section->mr) ||
3666
          memory_region_is_romd(section->mr))) {
3667
        /* I/O case */
3668
        addr = memory_region_section_addr(section, addr);
3669
        val = io_mem_read(section->mr, addr, 4);
3670
#if defined(TARGET_WORDS_BIGENDIAN)
3671
        if (endian == DEVICE_LITTLE_ENDIAN) {
3672
            val = bswap32(val);
3673
        }
3674
#else
3675
        if (endian == DEVICE_BIG_ENDIAN) {
3676
            val = bswap32(val);
3677
        }
3678
#endif
3679
    } else {
3680
        /* RAM case */
3681
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3682
                                & TARGET_PAGE_MASK)
3683
                               + memory_region_section_addr(section, addr));
3684
        switch (endian) {
3685
        case DEVICE_LITTLE_ENDIAN:
3686
            val = ldl_le_p(ptr);
3687
            break;
3688
        case DEVICE_BIG_ENDIAN:
3689
            val = ldl_be_p(ptr);
3690
            break;
3691
        default:
3692
            val = ldl_p(ptr);
3693
            break;
3694
        }
3695
    }
3696
    return val;
3697
}
3698

    
3699
uint32_t ldl_phys(target_phys_addr_t addr)
3700
{
3701
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3702
}
3703

    
3704
uint32_t ldl_le_phys(target_phys_addr_t addr)
3705
{
3706
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3707
}
3708

    
3709
uint32_t ldl_be_phys(target_phys_addr_t addr)
3710
{
3711
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3712
}
3713

    
3714
/* warning: addr must be aligned */
3715
static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3716
                                         enum device_endian endian)
3717
{
3718
    uint8_t *ptr;
3719
    uint64_t val;
3720
    MemoryRegionSection *section;
3721

    
3722
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3723

    
3724
    if (!(memory_region_is_ram(section->mr) ||
3725
          memory_region_is_romd(section->mr))) {
3726
        /* I/O case */
3727
        addr = memory_region_section_addr(section, addr);
3728

    
3729
        /* XXX This is broken when device endian != cpu endian.
3730
               Fix and add "endian" variable check */
3731
#ifdef TARGET_WORDS_BIGENDIAN
3732
        val = io_mem_read(section->mr, addr, 4) << 32;
3733
        val |= io_mem_read(section->mr, addr + 4, 4);
3734
#else
3735
        val = io_mem_read(section->mr, addr, 4);
3736
        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3737
#endif
3738
    } else {
3739
        /* RAM case */
3740
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3741
                                & TARGET_PAGE_MASK)
3742
                               + memory_region_section_addr(section, addr));
3743
        switch (endian) {
3744
        case DEVICE_LITTLE_ENDIAN:
3745
            val = ldq_le_p(ptr);
3746
            break;
3747
        case DEVICE_BIG_ENDIAN:
3748
            val = ldq_be_p(ptr);
3749
            break;
3750
        default:
3751
            val = ldq_p(ptr);
3752
            break;
3753
        }
3754
    }
3755
    return val;
3756
}
3757

    
3758
uint64_t ldq_phys(target_phys_addr_t addr)
3759
{
3760
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3761
}
3762

    
3763
uint64_t ldq_le_phys(target_phys_addr_t addr)
3764
{
3765
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3766
}
3767

    
3768
uint64_t ldq_be_phys(target_phys_addr_t addr)
3769
{
3770
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3771
}
3772

    
3773
/* XXX: optimize */
3774
uint32_t ldub_phys(target_phys_addr_t addr)
3775
{
3776
    uint8_t val;
3777
    cpu_physical_memory_read(addr, &val, 1);
3778
    return val;
3779
}
3780

    
3781
/* warning: addr must be aligned */
3782
static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3783
                                          enum device_endian endian)
3784
{
3785
    uint8_t *ptr;
3786
    uint64_t val;
3787
    MemoryRegionSection *section;
3788

    
3789
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3790

    
3791
    if (!(memory_region_is_ram(section->mr) ||
3792
          memory_region_is_romd(section->mr))) {
3793
        /* I/O case */
3794
        addr = memory_region_section_addr(section, addr);
3795
        val = io_mem_read(section->mr, addr, 2);
3796
#if defined(TARGET_WORDS_BIGENDIAN)
3797
        if (endian == DEVICE_LITTLE_ENDIAN) {
3798
            val = bswap16(val);
3799
        }
3800
#else
3801
        if (endian == DEVICE_BIG_ENDIAN) {
3802
            val = bswap16(val);
3803
        }
3804
#endif
3805
    } else {
3806
        /* RAM case */
3807
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3808
                                & TARGET_PAGE_MASK)
3809
                               + memory_region_section_addr(section, addr));
3810
        switch (endian) {
3811
        case DEVICE_LITTLE_ENDIAN:
3812
            val = lduw_le_p(ptr);
3813
            break;
3814
        case DEVICE_BIG_ENDIAN:
3815
            val = lduw_be_p(ptr);
3816
            break;
3817
        default:
3818
            val = lduw_p(ptr);
3819
            break;
3820
        }
3821
    }
3822
    return val;
3823
}
3824

    
3825
uint32_t lduw_phys(target_phys_addr_t addr)
3826
{
3827
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3828
}
3829

    
3830
uint32_t lduw_le_phys(target_phys_addr_t addr)
3831
{
3832
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3833
}
3834

    
3835
uint32_t lduw_be_phys(target_phys_addr_t addr)
3836
{
3837
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3838
}
3839

    
3840
/* warning: addr must be aligned. The ram page is not masked as dirty
3841
   and the code inside is not invalidated. It is useful if the dirty
3842
   bits are used to track modified PTEs */
3843
void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3844
{
3845
    uint8_t *ptr;
3846
    MemoryRegionSection *section;
3847

    
3848
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3849

    
3850
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3851
        addr = memory_region_section_addr(section, addr);
3852
        if (memory_region_is_ram(section->mr)) {
3853
            section = &phys_sections[phys_section_rom];
3854
        }
3855
        io_mem_write(section->mr, addr, val, 4);
3856
    } else {
3857
        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3858
                               & TARGET_PAGE_MASK)
3859
            + memory_region_section_addr(section, addr);
3860
        ptr = qemu_get_ram_ptr(addr1);
3861
        stl_p(ptr, val);
3862

    
3863
        if (unlikely(in_migration)) {
3864
            if (!cpu_physical_memory_is_dirty(addr1)) {
3865
                /* invalidate code */
3866
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3867
                /* set dirty bit */
3868
                cpu_physical_memory_set_dirty_flags(
3869
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
3870
            }
3871
        }
3872
    }
3873
}
3874

    
3875
void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3876
{
3877
    uint8_t *ptr;
3878
    MemoryRegionSection *section;
3879

    
3880
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3881

    
3882
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3883
        addr = memory_region_section_addr(section, addr);
3884
        if (memory_region_is_ram(section->mr)) {
3885
            section = &phys_sections[phys_section_rom];
3886
        }
3887
#ifdef TARGET_WORDS_BIGENDIAN
3888
        io_mem_write(section->mr, addr, val >> 32, 4);
3889
        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3890
#else
3891
        io_mem_write(section->mr, addr, (uint32_t)val, 4);
3892
        io_mem_write(section->mr, addr + 4, val >> 32, 4);
3893
#endif
3894
    } else {
3895
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3896
                                & TARGET_PAGE_MASK)
3897
                               + memory_region_section_addr(section, addr));
3898
        stq_p(ptr, val);
3899
    }
3900
}
3901

    
3902
/* warning: addr must be aligned */
3903
static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3904
                                     enum device_endian endian)
3905
{
3906
    uint8_t *ptr;
3907
    MemoryRegionSection *section;
3908

    
3909
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3910

    
3911
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3912
        addr = memory_region_section_addr(section, addr);
3913
        if (memory_region_is_ram(section->mr)) {
3914
            section = &phys_sections[phys_section_rom];
3915
        }
3916
#if defined(TARGET_WORDS_BIGENDIAN)
3917
        if (endian == DEVICE_LITTLE_ENDIAN) {
3918
            val = bswap32(val);
3919
        }
3920
#else
3921
        if (endian == DEVICE_BIG_ENDIAN) {
3922
            val = bswap32(val);
3923
        }
3924
#endif
3925
        io_mem_write(section->mr, addr, val, 4);
3926
    } else {
3927
        unsigned long addr1;
3928
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3929
            + memory_region_section_addr(section, addr);
3930
        /* RAM case */
3931
        ptr = qemu_get_ram_ptr(addr1);
3932
        switch (endian) {
3933
        case DEVICE_LITTLE_ENDIAN:
3934
            stl_le_p(ptr, val);
3935
            break;
3936
        case DEVICE_BIG_ENDIAN:
3937
            stl_be_p(ptr, val);
3938
            break;
3939
        default:
3940
            stl_p(ptr, val);
3941
            break;
3942
        }
3943
        if (!cpu_physical_memory_is_dirty(addr1)) {
3944
            /* invalidate code */
3945
            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3946
            /* set dirty bit */
3947
            cpu_physical_memory_set_dirty_flags(addr1,
3948
                (0xff & ~CODE_DIRTY_FLAG));
3949
        }
3950
    }
3951
}
3952

    
3953
void stl_phys(target_phys_addr_t addr, uint32_t val)
3954
{
3955
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3956
}
3957

    
3958
void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3959
{
3960
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3961
}
3962

    
3963
void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3964
{
3965
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3966
}
3967

    
3968
/* XXX: optimize */
3969
void stb_phys(target_phys_addr_t addr, uint32_t val)
3970
{
3971
    uint8_t v = val;
3972
    cpu_physical_memory_write(addr, &v, 1);
3973
}
3974

    
3975
/* warning: addr must be aligned */
3976
static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
3977
                                     enum device_endian endian)
3978
{
3979
    uint8_t *ptr;
3980
    MemoryRegionSection *section;
3981

    
3982
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3983

    
3984
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3985
        addr = memory_region_section_addr(section, addr);
3986
        if (memory_region_is_ram(section->mr)) {
3987
            section = &phys_sections[phys_section_rom];
3988
        }
3989
#if defined(TARGET_WORDS_BIGENDIAN)
3990
        if (endian == DEVICE_LITTLE_ENDIAN) {
3991
            val = bswap16(val);
3992
        }
3993
#else
3994
        if (endian == DEVICE_BIG_ENDIAN) {
3995
            val = bswap16(val);
3996
        }
3997
#endif
3998
        io_mem_write(section->mr, addr, val, 2);
3999
    } else {
4000
        unsigned long addr1;
4001
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4002
            + memory_region_section_addr(section, addr);
4003
        /* RAM case */
4004
        ptr = qemu_get_ram_ptr(addr1);
4005
        switch (endian) {
4006
        case DEVICE_LITTLE_ENDIAN:
4007
            stw_le_p(ptr, val);
4008
            break;
4009
        case DEVICE_BIG_ENDIAN:
4010
            stw_be_p(ptr, val);
4011
            break;
4012
        default:
4013
            stw_p(ptr, val);
4014
            break;
4015
        }
4016
        if (!cpu_physical_memory_is_dirty(addr1)) {
4017
            /* invalidate code */
4018
            tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4019
            /* set dirty bit */
4020
            cpu_physical_memory_set_dirty_flags(addr1,
4021
                (0xff & ~CODE_DIRTY_FLAG));
4022
        }
4023
    }
4024
}
4025

    
4026
void stw_phys(target_phys_addr_t addr, uint32_t val)
4027
{
4028
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4029
}
4030

    
4031
void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4032
{
4033
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4034
}
4035

    
4036
void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4037
{
4038
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4039
}
4040

    
4041
/* XXX: optimize */
4042
void stq_phys(target_phys_addr_t addr, uint64_t val)
4043
{
4044
    val = tswap64(val);
4045
    cpu_physical_memory_write(addr, &val, 8);
4046
}
4047

    
4048
void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4049
{
4050
    val = cpu_to_le64(val);
4051
    cpu_physical_memory_write(addr, &val, 8);
4052
}
4053

    
4054
void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4055
{
4056
    val = cpu_to_be64(val);
4057
    cpu_physical_memory_write(addr, &val, 8);
4058
}
4059

    
4060
/* virtual memory access for debug (includes writing to ROM) */
4061
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4062
                        uint8_t *buf, int len, int is_write)
4063
{
4064
    int l;
4065
    target_phys_addr_t phys_addr;
4066
    target_ulong page;
4067

    
4068
    while (len > 0) {
4069
        page = addr & TARGET_PAGE_MASK;
4070
        phys_addr = cpu_get_phys_page_debug(env, page);
4071
        /* if no physical page mapped, return an error */
4072
        if (phys_addr == -1)
4073
            return -1;
4074
        l = (page + TARGET_PAGE_SIZE) - addr;
4075
        if (l > len)
4076
            l = len;
4077
        phys_addr += (addr & ~TARGET_PAGE_MASK);
4078
        if (is_write)
4079
            cpu_physical_memory_write_rom(phys_addr, buf, l);
4080
        else
4081
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4082
        len -= l;
4083
        buf += l;
4084
        addr += l;
4085
    }
4086
    return 0;
4087
}
4088
#endif
4089

    
4090
/* in deterministic execution mode, instructions doing device I/Os
4091
   must be at the end of the TB */
4092
void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4093
{
4094
    TranslationBlock *tb;
4095
    uint32_t n, cflags;
4096
    target_ulong pc, cs_base;
4097
    uint64_t flags;
4098

    
4099
    tb = tb_find_pc(retaddr);
4100
    if (!tb) {
4101
        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
4102
                  (void *)retaddr);
4103
    }
4104
    n = env->icount_decr.u16.low + tb->icount;
4105
    cpu_restore_state(tb, env, retaddr);
4106
    /* Calculate how many instructions had been executed before the fault
4107
       occurred.  */
4108
    n = n - env->icount_decr.u16.low;
4109
    /* Generate a new TB ending on the I/O insn.  */
4110
    n++;
4111
    /* On MIPS and SH, delay slot instructions can only be restarted if
4112
       they were already the first instruction in the TB.  If this is not
4113
       the first instruction in a TB then re-execute the preceding
4114
       branch.  */
4115
#if defined(TARGET_MIPS)
4116
    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4117
        env->active_tc.PC -= 4;
4118
        env->icount_decr.u16.low++;
4119
        env->hflags &= ~MIPS_HFLAG_BMASK;
4120
    }
4121
#elif defined(TARGET_SH4)
4122
    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4123
            && n > 1) {
4124
        env->pc -= 2;
4125
        env->icount_decr.u16.low++;
4126
        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4127
    }
4128
#endif
4129
    /* This should never happen.  */
4130
    if (n > CF_COUNT_MASK)
4131
        cpu_abort(env, "TB too big during recompile");
4132

    
4133
    cflags = n | CF_LAST_IO;
4134
    pc = tb->pc;
4135
    cs_base = tb->cs_base;
4136
    flags = tb->flags;
4137
    tb_phys_invalidate(tb, -1);
4138
    /* FIXME: In theory this could raise an exception.  In practice
4139
       we have already translated the block once so it's probably ok.  */
4140
    tb_gen_code(env, pc, cs_base, flags, cflags);
4141
    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4142
       the first in the TB) then we end up generating a whole new TB and
4143
       repeating the fault, which is horribly inefficient.
4144
       Better would be to execute just this insn uncached, or generate a
4145
       second new TB.  */
4146
    cpu_resume_from_signal(env, NULL);
4147
}
4148

    
4149
#if !defined(CONFIG_USER_ONLY)
4150

    
4151
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4152
{
4153
    int i, target_code_size, max_target_code_size;
4154
    int direct_jmp_count, direct_jmp2_count, cross_page;
4155
    TranslationBlock *tb;
4156

    
4157
    target_code_size = 0;
4158
    max_target_code_size = 0;
4159
    cross_page = 0;
4160
    direct_jmp_count = 0;
4161
    direct_jmp2_count = 0;
4162
    for(i = 0; i < nb_tbs; i++) {
4163
        tb = &tbs[i];
4164
        target_code_size += tb->size;
4165
        if (tb->size > max_target_code_size)
4166
            max_target_code_size = tb->size;
4167
        if (tb->page_addr[1] != -1)
4168
            cross_page++;
4169
        if (tb->tb_next_offset[0] != 0xffff) {
4170
            direct_jmp_count++;
4171
            if (tb->tb_next_offset[1] != 0xffff) {
4172
                direct_jmp2_count++;
4173
            }
4174
        }
4175
    }
4176
    /* XXX: avoid using doubles ? */
4177
    cpu_fprintf(f, "Translation buffer state:\n");
4178
    cpu_fprintf(f, "gen code size       %td/%ld\n",
4179
                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4180
    cpu_fprintf(f, "TB count            %d/%d\n", 
4181
                nb_tbs, code_gen_max_blocks);
4182
    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
4183
                nb_tbs ? target_code_size / nb_tbs : 0,
4184
                max_target_code_size);
4185
    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
4186
                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4187
                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4188
    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4189
            cross_page,
4190
            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4191
    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
4192
                direct_jmp_count,
4193
                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4194
                direct_jmp2_count,
4195
                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4196
    cpu_fprintf(f, "\nStatistics:\n");
4197
    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
4198
    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4199
    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
4200
    tcg_dump_info(f, cpu_fprintf);
4201
}
4202

    
4203
/*
4204
 * A helper function for the _utterly broken_ virtio device model to find out if
4205
 * it's running on a big endian machine. Don't do this at home kids!
4206
 */
4207
bool virtio_is_big_endian(void);
4208
bool virtio_is_big_endian(void)
4209
{
4210
#if defined(TARGET_WORDS_BIGENDIAN)
4211
    return true;
4212
#else
4213
    return false;
4214
#endif
4215
}
4216

    
4217
#endif
4218

    
4219
#ifndef CONFIG_USER_ONLY
4220
bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4221
{
4222
    MemoryRegionSection *section;
4223

    
4224
    section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4225

    
4226
    return !(memory_region_is_ram(section->mr) ||
4227
             memory_region_is_romd(section->mr));
4228
}
4229
#endif