Statistics
| Branch: | Revision:

root / exec.c @ 5fda043f

History | View | Annotate | Download (122 kB)

1
/*
2
 *  virtual page mapping and translated block handling
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "osdep.h"
33
#include "kvm.h"
34
#include "hw/xen.h"
35
#include "qemu-timer.h"
36
#include "memory.h"
37
#include "exec-memory.h"
38
#if defined(CONFIG_USER_ONLY)
39
#include <qemu.h>
40
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41
#include <sys/param.h>
42
#if __FreeBSD_version >= 700104
43
#define HAVE_KINFO_GETVMMAP
44
#define sigqueue sigqueue_freebsd  /* avoid redefinition */
45
#include <sys/time.h>
46
#include <sys/proc.h>
47
#include <machine/profile.h>
48
#define _KERNEL
49
#include <sys/user.h>
50
#undef _KERNEL
51
#undef sigqueue
52
#include <libutil.h>
53
#endif
54
#endif
55
#else /* !CONFIG_USER_ONLY */
56
#include "xen-mapcache.h"
57
#include "trace.h"
58
#endif
59

    
60
#include "cputlb.h"
61

    
62
#define WANT_EXEC_OBSOLETE
63
#include "exec-obsolete.h"
64

    
65
//#define DEBUG_TB_INVALIDATE
66
//#define DEBUG_FLUSH
67
//#define DEBUG_UNASSIGNED
68

    
69
/* make various TB consistency checks */
70
//#define DEBUG_TB_CHECK
71

    
72
//#define DEBUG_IOPORT
73
//#define DEBUG_SUBPAGE
74

    
75
#if !defined(CONFIG_USER_ONLY)
76
/* TB consistency checks only implemented for usermode emulation.  */
77
#undef DEBUG_TB_CHECK
78
#endif
79

    
80
#define SMC_BITMAP_USE_THRESHOLD 10
81

    
82
static TranslationBlock *tbs;
83
static int code_gen_max_blocks;
84
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85
static int nb_tbs;
86
/* any access to the tbs or the page table must use this lock */
87
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88

    
89
#if defined(__arm__) || defined(__sparc_v9__)
90
/* The prologue must be reachable with a direct jump. ARM and Sparc64
91
 have limited branch ranges (possibly also PPC) so place it in a
92
 section close to code segment. */
93
#define code_gen_section                                \
94
    __attribute__((__section__(".gen_code")))           \
95
    __attribute__((aligned (32)))
96
#elif defined(_WIN32) && !defined(_WIN64)
97
#define code_gen_section                                \
98
    __attribute__((aligned (16)))
99
#else
100
#define code_gen_section                                \
101
    __attribute__((aligned (32)))
102
#endif
103

    
104
uint8_t code_gen_prologue[1024] code_gen_section;
105
static uint8_t *code_gen_buffer;
106
static unsigned long code_gen_buffer_size;
107
/* threshold to flush the translated code buffer */
108
static unsigned long code_gen_buffer_max_size;
109
static uint8_t *code_gen_ptr;
110

    
111
#if !defined(CONFIG_USER_ONLY)
112
int phys_ram_fd;
113
static int in_migration;
114

    
115
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116

    
117
static MemoryRegion *system_memory;
118
static MemoryRegion *system_io;
119

    
120
MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121
static MemoryRegion io_mem_subpage_ram;
122

    
123
#endif
124

    
125
CPUArchState *first_cpu;
126
/* current CPU in the current thread. It is only valid inside
127
   cpu_exec() */
128
DEFINE_TLS(CPUArchState *,cpu_single_env);
129
/* 0 = Do not count executed instructions.
130
   1 = Precise instruction counting.
131
   2 = Adaptive rate instruction counting.  */
132
int use_icount = 0;
133

    
134
typedef struct PageDesc {
135
    /* list of TBs intersecting this ram page */
136
    TranslationBlock *first_tb;
137
    /* in order to optimize self modifying code, we count the number
138
       of lookups we do to a given page to use a bitmap */
139
    unsigned int code_write_count;
140
    uint8_t *code_bitmap;
141
#if defined(CONFIG_USER_ONLY)
142
    unsigned long flags;
143
#endif
144
} PageDesc;
145

    
146
/* In system mode we want L1_MAP to be based on ram offsets,
147
   while in user mode we want it to be based on virtual addresses.  */
148
#if !defined(CONFIG_USER_ONLY)
149
#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150
# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
151
#else
152
# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
153
#endif
154
#else
155
# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
156
#endif
157

    
158
/* Size of the L2 (and L3, etc) page tables.  */
159
#define L2_BITS 10
160
#define L2_SIZE (1 << L2_BITS)
161

    
162
#define P_L2_LEVELS \
163
    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164

    
165
/* The bits remaining after N lower levels of page tables.  */
166
#define V_L1_BITS_REM \
167
    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168

    
169
#if V_L1_BITS_REM < 4
170
#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
171
#else
172
#define V_L1_BITS  V_L1_BITS_REM
173
#endif
174

    
175
#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
176

    
177
#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178

    
179
uintptr_t qemu_real_host_page_size;
180
uintptr_t qemu_host_page_size;
181
uintptr_t qemu_host_page_mask;
182

    
183
/* This is a multi-level map on the virtual address space.
184
   The bottom level has pointers to PageDesc.  */
185
static void *l1_map[V_L1_SIZE];
186

    
187
#if !defined(CONFIG_USER_ONLY)
188
typedef struct PhysPageEntry PhysPageEntry;
189

    
190
static MemoryRegionSection *phys_sections;
191
static unsigned phys_sections_nb, phys_sections_nb_alloc;
192
static uint16_t phys_section_unassigned;
193
static uint16_t phys_section_notdirty;
194
static uint16_t phys_section_rom;
195
static uint16_t phys_section_watch;
196

    
197
struct PhysPageEntry {
198
    uint16_t is_leaf : 1;
199
     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200
    uint16_t ptr : 15;
201
};
202

    
203
/* Simple allocator for PhysPageEntry nodes */
204
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206

    
207
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208

    
209
/* This is a multi-level map on the physical address space.
210
   The bottom level has pointers to MemoryRegionSections.  */
211
static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212

    
213
static void io_mem_init(void);
214
static void memory_map_init(void);
215

    
216
static MemoryRegion io_mem_watch;
217
#endif
218

    
219
/* statistics */
220
static int tb_flush_count;
221
static int tb_phys_invalidate_count;
222

    
223
#ifdef _WIN32
224
static void map_exec(void *addr, long size)
225
{
226
    DWORD old_protect;
227
    VirtualProtect(addr, size,
228
                   PAGE_EXECUTE_READWRITE, &old_protect);
229
    
230
}
231
#else
232
static void map_exec(void *addr, long size)
233
{
234
    unsigned long start, end, page_size;
235
    
236
    page_size = getpagesize();
237
    start = (unsigned long)addr;
238
    start &= ~(page_size - 1);
239
    
240
    end = (unsigned long)addr + size;
241
    end += page_size - 1;
242
    end &= ~(page_size - 1);
243
    
244
    mprotect((void *)start, end - start,
245
             PROT_READ | PROT_WRITE | PROT_EXEC);
246
}
247
#endif
248

    
249
static void page_init(void)
250
{
251
    /* NOTE: we can always suppose that qemu_host_page_size >=
252
       TARGET_PAGE_SIZE */
253
#ifdef _WIN32
254
    {
255
        SYSTEM_INFO system_info;
256

    
257
        GetSystemInfo(&system_info);
258
        qemu_real_host_page_size = system_info.dwPageSize;
259
    }
260
#else
261
    qemu_real_host_page_size = getpagesize();
262
#endif
263
    if (qemu_host_page_size == 0)
264
        qemu_host_page_size = qemu_real_host_page_size;
265
    if (qemu_host_page_size < TARGET_PAGE_SIZE)
266
        qemu_host_page_size = TARGET_PAGE_SIZE;
267
    qemu_host_page_mask = ~(qemu_host_page_size - 1);
268

    
269
#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
270
    {
271
#ifdef HAVE_KINFO_GETVMMAP
272
        struct kinfo_vmentry *freep;
273
        int i, cnt;
274

    
275
        freep = kinfo_getvmmap(getpid(), &cnt);
276
        if (freep) {
277
            mmap_lock();
278
            for (i = 0; i < cnt; i++) {
279
                unsigned long startaddr, endaddr;
280

    
281
                startaddr = freep[i].kve_start;
282
                endaddr = freep[i].kve_end;
283
                if (h2g_valid(startaddr)) {
284
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
285

    
286
                    if (h2g_valid(endaddr)) {
287
                        endaddr = h2g(endaddr);
288
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
289
                    } else {
290
#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
291
                        endaddr = ~0ul;
292
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
293
#endif
294
                    }
295
                }
296
            }
297
            free(freep);
298
            mmap_unlock();
299
        }
300
#else
301
        FILE *f;
302

    
303
        last_brk = (unsigned long)sbrk(0);
304

    
305
        f = fopen("/compat/linux/proc/self/maps", "r");
306
        if (f) {
307
            mmap_lock();
308

    
309
            do {
310
                unsigned long startaddr, endaddr;
311
                int n;
312

    
313
                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
314

    
315
                if (n == 2 && h2g_valid(startaddr)) {
316
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
317

    
318
                    if (h2g_valid(endaddr)) {
319
                        endaddr = h2g(endaddr);
320
                    } else {
321
                        endaddr = ~0ul;
322
                    }
323
                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
324
                }
325
            } while (!feof(f));
326

    
327
            fclose(f);
328
            mmap_unlock();
329
        }
330
#endif
331
    }
332
#endif
333
}
334

    
335
static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
336
{
337
    PageDesc *pd;
338
    void **lp;
339
    int i;
340

    
341
#if defined(CONFIG_USER_ONLY)
342
    /* We can't use g_malloc because it may recurse into a locked mutex. */
343
# define ALLOC(P, SIZE)                                 \
344
    do {                                                \
345
        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
346
                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
347
    } while (0)
348
#else
349
# define ALLOC(P, SIZE) \
350
    do { P = g_malloc0(SIZE); } while (0)
351
#endif
352

    
353
    /* Level 1.  Always allocated.  */
354
    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
355

    
356
    /* Level 2..N-1.  */
357
    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
358
        void **p = *lp;
359

    
360
        if (p == NULL) {
361
            if (!alloc) {
362
                return NULL;
363
            }
364
            ALLOC(p, sizeof(void *) * L2_SIZE);
365
            *lp = p;
366
        }
367

    
368
        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
369
    }
370

    
371
    pd = *lp;
372
    if (pd == NULL) {
373
        if (!alloc) {
374
            return NULL;
375
        }
376
        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
377
        *lp = pd;
378
    }
379

    
380
#undef ALLOC
381

    
382
    return pd + (index & (L2_SIZE - 1));
383
}
384

    
385
static inline PageDesc *page_find(tb_page_addr_t index)
386
{
387
    return page_find_alloc(index, 0);
388
}
389

    
390
#if !defined(CONFIG_USER_ONLY)
391

    
392
static void phys_map_node_reserve(unsigned nodes)
393
{
394
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
395
        typedef PhysPageEntry Node[L2_SIZE];
396
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
397
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
398
                                      phys_map_nodes_nb + nodes);
399
        phys_map_nodes = g_renew(Node, phys_map_nodes,
400
                                 phys_map_nodes_nb_alloc);
401
    }
402
}
403

    
404
static uint16_t phys_map_node_alloc(void)
405
{
406
    unsigned i;
407
    uint16_t ret;
408

    
409
    ret = phys_map_nodes_nb++;
410
    assert(ret != PHYS_MAP_NODE_NIL);
411
    assert(ret != phys_map_nodes_nb_alloc);
412
    for (i = 0; i < L2_SIZE; ++i) {
413
        phys_map_nodes[ret][i].is_leaf = 0;
414
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
415
    }
416
    return ret;
417
}
418

    
419
static void phys_map_nodes_reset(void)
420
{
421
    phys_map_nodes_nb = 0;
422
}
423

    
424

    
425
static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
426
                                target_phys_addr_t *nb, uint16_t leaf,
427
                                int level)
428
{
429
    PhysPageEntry *p;
430
    int i;
431
    target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
432

    
433
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
434
        lp->ptr = phys_map_node_alloc();
435
        p = phys_map_nodes[lp->ptr];
436
        if (level == 0) {
437
            for (i = 0; i < L2_SIZE; i++) {
438
                p[i].is_leaf = 1;
439
                p[i].ptr = phys_section_unassigned;
440
            }
441
        }
442
    } else {
443
        p = phys_map_nodes[lp->ptr];
444
    }
445
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
446

    
447
    while (*nb && lp < &p[L2_SIZE]) {
448
        if ((*index & (step - 1)) == 0 && *nb >= step) {
449
            lp->is_leaf = true;
450
            lp->ptr = leaf;
451
            *index += step;
452
            *nb -= step;
453
        } else {
454
            phys_page_set_level(lp, index, nb, leaf, level - 1);
455
        }
456
        ++lp;
457
    }
458
}
459

    
460
static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
461
                          uint16_t leaf)
462
{
463
    /* Wildly overreserve - it doesn't matter much. */
464
    phys_map_node_reserve(3 * P_L2_LEVELS);
465

    
466
    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
467
}
468

    
469
MemoryRegionSection *phys_page_find(target_phys_addr_t index)
470
{
471
    PhysPageEntry lp = phys_map;
472
    PhysPageEntry *p;
473
    int i;
474
    uint16_t s_index = phys_section_unassigned;
475

    
476
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
477
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
478
            goto not_found;
479
        }
480
        p = phys_map_nodes[lp.ptr];
481
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
482
    }
483

    
484
    s_index = lp.ptr;
485
not_found:
486
    return &phys_sections[s_index];
487
}
488

    
489
bool memory_region_is_unassigned(MemoryRegion *mr)
490
{
491
    return mr != &io_mem_ram && mr != &io_mem_rom
492
        && mr != &io_mem_notdirty && !mr->rom_device
493
        && mr != &io_mem_watch;
494
}
495

    
496
#define mmap_lock() do { } while(0)
497
#define mmap_unlock() do { } while(0)
498
#endif
499

    
500
#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
501

    
502
#if defined(CONFIG_USER_ONLY)
503
/* Currently it is not recommended to allocate big chunks of data in
504
   user mode. It will change when a dedicated libc will be used */
505
#define USE_STATIC_CODE_GEN_BUFFER
506
#endif
507

    
508
#ifdef USE_STATIC_CODE_GEN_BUFFER
509
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
510
               __attribute__((aligned (CODE_GEN_ALIGN)));
511
#endif
512

    
513
static void code_gen_alloc(unsigned long tb_size)
514
{
515
#ifdef USE_STATIC_CODE_GEN_BUFFER
516
    code_gen_buffer = static_code_gen_buffer;
517
    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
518
    map_exec(code_gen_buffer, code_gen_buffer_size);
519
#else
520
    code_gen_buffer_size = tb_size;
521
    if (code_gen_buffer_size == 0) {
522
#if defined(CONFIG_USER_ONLY)
523
        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
524
#else
525
        /* XXX: needs adjustments */
526
        code_gen_buffer_size = (unsigned long)(ram_size / 4);
527
#endif
528
    }
529
    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
530
        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
531
    /* The code gen buffer location may have constraints depending on
532
       the host cpu and OS */
533
#if defined(__linux__) 
534
    {
535
        int flags;
536
        void *start = NULL;
537

    
538
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
539
#if defined(__x86_64__)
540
        flags |= MAP_32BIT;
541
        /* Cannot map more than that */
542
        if (code_gen_buffer_size > (800 * 1024 * 1024))
543
            code_gen_buffer_size = (800 * 1024 * 1024);
544
#elif defined(__sparc_v9__)
545
        // Map the buffer below 2G, so we can use direct calls and branches
546
        flags |= MAP_FIXED;
547
        start = (void *) 0x60000000UL;
548
        if (code_gen_buffer_size > (512 * 1024 * 1024))
549
            code_gen_buffer_size = (512 * 1024 * 1024);
550
#elif defined(__arm__)
551
        /* Keep the buffer no bigger than 16MB to branch between blocks */
552
        if (code_gen_buffer_size > 16 * 1024 * 1024)
553
            code_gen_buffer_size = 16 * 1024 * 1024;
554
#elif defined(__s390x__)
555
        /* Map the buffer so that we can use direct calls and branches.  */
556
        /* We have a +- 4GB range on the branches; leave some slop.  */
557
        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
558
            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
559
        }
560
        start = (void *)0x90000000UL;
561
#endif
562
        code_gen_buffer = mmap(start, code_gen_buffer_size,
563
                               PROT_WRITE | PROT_READ | PROT_EXEC,
564
                               flags, -1, 0);
565
        if (code_gen_buffer == MAP_FAILED) {
566
            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
567
            exit(1);
568
        }
569
    }
570
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
571
    || defined(__DragonFly__) || defined(__OpenBSD__) \
572
    || defined(__NetBSD__)
573
    {
574
        int flags;
575
        void *addr = NULL;
576
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
577
#if defined(__x86_64__)
578
        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
579
         * 0x40000000 is free */
580
        flags |= MAP_FIXED;
581
        addr = (void *)0x40000000;
582
        /* Cannot map more than that */
583
        if (code_gen_buffer_size > (800 * 1024 * 1024))
584
            code_gen_buffer_size = (800 * 1024 * 1024);
585
#elif defined(__sparc_v9__)
586
        // Map the buffer below 2G, so we can use direct calls and branches
587
        flags |= MAP_FIXED;
588
        addr = (void *) 0x60000000UL;
589
        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
590
            code_gen_buffer_size = (512 * 1024 * 1024);
591
        }
592
#endif
593
        code_gen_buffer = mmap(addr, code_gen_buffer_size,
594
                               PROT_WRITE | PROT_READ | PROT_EXEC, 
595
                               flags, -1, 0);
596
        if (code_gen_buffer == MAP_FAILED) {
597
            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
598
            exit(1);
599
        }
600
    }
601
#else
602
    code_gen_buffer = g_malloc(code_gen_buffer_size);
603
    map_exec(code_gen_buffer, code_gen_buffer_size);
604
#endif
605
#endif /* !USE_STATIC_CODE_GEN_BUFFER */
606
    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
607
    code_gen_buffer_max_size = code_gen_buffer_size -
608
        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
609
    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
610
    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
611
}
612

    
613
/* Must be called before using the QEMU cpus. 'tb_size' is the size
614
   (in bytes) allocated to the translation buffer. Zero means default
615
   size. */
616
void tcg_exec_init(unsigned long tb_size)
617
{
618
    cpu_gen_init();
619
    code_gen_alloc(tb_size);
620
    code_gen_ptr = code_gen_buffer;
621
    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
622
    page_init();
623
#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
624
    /* There's no guest base to take into account, so go ahead and
625
       initialize the prologue now.  */
626
    tcg_prologue_init(&tcg_ctx);
627
#endif
628
}
629

    
630
bool tcg_enabled(void)
631
{
632
    return code_gen_buffer != NULL;
633
}
634

    
635
void cpu_exec_init_all(void)
636
{
637
#if !defined(CONFIG_USER_ONLY)
638
    memory_map_init();
639
    io_mem_init();
640
#endif
641
}
642

    
643
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
644

    
645
static int cpu_common_post_load(void *opaque, int version_id)
646
{
647
    CPUArchState *env = opaque;
648

    
649
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
650
       version_id is increased. */
651
    env->interrupt_request &= ~0x01;
652
    tlb_flush(env, 1);
653

    
654
    return 0;
655
}
656

    
657
static const VMStateDescription vmstate_cpu_common = {
658
    .name = "cpu_common",
659
    .version_id = 1,
660
    .minimum_version_id = 1,
661
    .minimum_version_id_old = 1,
662
    .post_load = cpu_common_post_load,
663
    .fields      = (VMStateField []) {
664
        VMSTATE_UINT32(halted, CPUArchState),
665
        VMSTATE_UINT32(interrupt_request, CPUArchState),
666
        VMSTATE_END_OF_LIST()
667
    }
668
};
669
#endif
670

    
671
CPUArchState *qemu_get_cpu(int cpu)
672
{
673
    CPUArchState *env = first_cpu;
674

    
675
    while (env) {
676
        if (env->cpu_index == cpu)
677
            break;
678
        env = env->next_cpu;
679
    }
680

    
681
    return env;
682
}
683

    
684
void cpu_exec_init(CPUArchState *env)
685
{
686
    CPUArchState **penv;
687
    int cpu_index;
688

    
689
#if defined(CONFIG_USER_ONLY)
690
    cpu_list_lock();
691
#endif
692
    env->next_cpu = NULL;
693
    penv = &first_cpu;
694
    cpu_index = 0;
695
    while (*penv != NULL) {
696
        penv = &(*penv)->next_cpu;
697
        cpu_index++;
698
    }
699
    env->cpu_index = cpu_index;
700
    env->numa_node = 0;
701
    QTAILQ_INIT(&env->breakpoints);
702
    QTAILQ_INIT(&env->watchpoints);
703
#ifndef CONFIG_USER_ONLY
704
    env->thread_id = qemu_get_thread_id();
705
#endif
706
    *penv = env;
707
#if defined(CONFIG_USER_ONLY)
708
    cpu_list_unlock();
709
#endif
710
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
711
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
712
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
713
                    cpu_save, cpu_load, env);
714
#endif
715
}
716

    
717
/* Allocate a new translation block. Flush the translation buffer if
718
   too many translation blocks or too much generated code. */
719
static TranslationBlock *tb_alloc(target_ulong pc)
720
{
721
    TranslationBlock *tb;
722

    
723
    if (nb_tbs >= code_gen_max_blocks ||
724
        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
725
        return NULL;
726
    tb = &tbs[nb_tbs++];
727
    tb->pc = pc;
728
    tb->cflags = 0;
729
    return tb;
730
}
731

    
732
void tb_free(TranslationBlock *tb)
733
{
734
    /* In practice this is mostly used for single use temporary TB
735
       Ignore the hard cases and just back up if this TB happens to
736
       be the last one generated.  */
737
    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
738
        code_gen_ptr = tb->tc_ptr;
739
        nb_tbs--;
740
    }
741
}
742

    
743
static inline void invalidate_page_bitmap(PageDesc *p)
744
{
745
    if (p->code_bitmap) {
746
        g_free(p->code_bitmap);
747
        p->code_bitmap = NULL;
748
    }
749
    p->code_write_count = 0;
750
}
751

    
752
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
753

    
754
static void page_flush_tb_1 (int level, void **lp)
755
{
756
    int i;
757

    
758
    if (*lp == NULL) {
759
        return;
760
    }
761
    if (level == 0) {
762
        PageDesc *pd = *lp;
763
        for (i = 0; i < L2_SIZE; ++i) {
764
            pd[i].first_tb = NULL;
765
            invalidate_page_bitmap(pd + i);
766
        }
767
    } else {
768
        void **pp = *lp;
769
        for (i = 0; i < L2_SIZE; ++i) {
770
            page_flush_tb_1 (level - 1, pp + i);
771
        }
772
    }
773
}
774

    
775
static void page_flush_tb(void)
776
{
777
    int i;
778
    for (i = 0; i < V_L1_SIZE; i++) {
779
        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
780
    }
781
}
782

    
783
/* flush all the translation blocks */
784
/* XXX: tb_flush is currently not thread safe */
785
void tb_flush(CPUArchState *env1)
786
{
787
    CPUArchState *env;
788
#if defined(DEBUG_FLUSH)
789
    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
790
           (unsigned long)(code_gen_ptr - code_gen_buffer),
791
           nb_tbs, nb_tbs > 0 ?
792
           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
793
#endif
794
    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
795
        cpu_abort(env1, "Internal error: code buffer overflow\n");
796

    
797
    nb_tbs = 0;
798

    
799
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
800
        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
801
    }
802

    
803
    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
804
    page_flush_tb();
805

    
806
    code_gen_ptr = code_gen_buffer;
807
    /* XXX: flush processor icache at this point if cache flush is
808
       expensive */
809
    tb_flush_count++;
810
}
811

    
812
#ifdef DEBUG_TB_CHECK
813

    
814
static void tb_invalidate_check(target_ulong address)
815
{
816
    TranslationBlock *tb;
817
    int i;
818
    address &= TARGET_PAGE_MASK;
819
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
820
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
821
            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
822
                  address >= tb->pc + tb->size)) {
823
                printf("ERROR invalidate: address=" TARGET_FMT_lx
824
                       " PC=%08lx size=%04x\n",
825
                       address, (long)tb->pc, tb->size);
826
            }
827
        }
828
    }
829
}
830

    
831
/* verify that all the pages have correct rights for code */
832
static void tb_page_check(void)
833
{
834
    TranslationBlock *tb;
835
    int i, flags1, flags2;
836

    
837
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
838
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
839
            flags1 = page_get_flags(tb->pc);
840
            flags2 = page_get_flags(tb->pc + tb->size - 1);
841
            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
842
                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
843
                       (long)tb->pc, tb->size, flags1, flags2);
844
            }
845
        }
846
    }
847
}
848

    
849
#endif
850

    
851
/* invalidate one TB */
852
static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
853
                             int next_offset)
854
{
855
    TranslationBlock *tb1;
856
    for(;;) {
857
        tb1 = *ptb;
858
        if (tb1 == tb) {
859
            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
860
            break;
861
        }
862
        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
863
    }
864
}
865

    
866
static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
867
{
868
    TranslationBlock *tb1;
869
    unsigned int n1;
870

    
871
    for(;;) {
872
        tb1 = *ptb;
873
        n1 = (uintptr_t)tb1 & 3;
874
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
875
        if (tb1 == tb) {
876
            *ptb = tb1->page_next[n1];
877
            break;
878
        }
879
        ptb = &tb1->page_next[n1];
880
    }
881
}
882

    
883
static inline void tb_jmp_remove(TranslationBlock *tb, int n)
884
{
885
    TranslationBlock *tb1, **ptb;
886
    unsigned int n1;
887

    
888
    ptb = &tb->jmp_next[n];
889
    tb1 = *ptb;
890
    if (tb1) {
891
        /* find tb(n) in circular list */
892
        for(;;) {
893
            tb1 = *ptb;
894
            n1 = (uintptr_t)tb1 & 3;
895
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
896
            if (n1 == n && tb1 == tb)
897
                break;
898
            if (n1 == 2) {
899
                ptb = &tb1->jmp_first;
900
            } else {
901
                ptb = &tb1->jmp_next[n1];
902
            }
903
        }
904
        /* now we can suppress tb(n) from the list */
905
        *ptb = tb->jmp_next[n];
906

    
907
        tb->jmp_next[n] = NULL;
908
    }
909
}
910

    
911
/* reset the jump entry 'n' of a TB so that it is not chained to
912
   another TB */
913
static inline void tb_reset_jump(TranslationBlock *tb, int n)
914
{
915
    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
916
}
917

    
918
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
919
{
920
    CPUArchState *env;
921
    PageDesc *p;
922
    unsigned int h, n1;
923
    tb_page_addr_t phys_pc;
924
    TranslationBlock *tb1, *tb2;
925

    
926
    /* remove the TB from the hash list */
927
    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
928
    h = tb_phys_hash_func(phys_pc);
929
    tb_remove(&tb_phys_hash[h], tb,
930
              offsetof(TranslationBlock, phys_hash_next));
931

    
932
    /* remove the TB from the page list */
933
    if (tb->page_addr[0] != page_addr) {
934
        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
935
        tb_page_remove(&p->first_tb, tb);
936
        invalidate_page_bitmap(p);
937
    }
938
    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
939
        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
940
        tb_page_remove(&p->first_tb, tb);
941
        invalidate_page_bitmap(p);
942
    }
943

    
944
    tb_invalidated_flag = 1;
945

    
946
    /* remove the TB from the hash list */
947
    h = tb_jmp_cache_hash_func(tb->pc);
948
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
949
        if (env->tb_jmp_cache[h] == tb)
950
            env->tb_jmp_cache[h] = NULL;
951
    }
952

    
953
    /* suppress this TB from the two jump lists */
954
    tb_jmp_remove(tb, 0);
955
    tb_jmp_remove(tb, 1);
956

    
957
    /* suppress any remaining jumps to this TB */
958
    tb1 = tb->jmp_first;
959
    for(;;) {
960
        n1 = (uintptr_t)tb1 & 3;
961
        if (n1 == 2)
962
            break;
963
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
964
        tb2 = tb1->jmp_next[n1];
965
        tb_reset_jump(tb1, n1);
966
        tb1->jmp_next[n1] = NULL;
967
        tb1 = tb2;
968
    }
969
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
970

    
971
    tb_phys_invalidate_count++;
972
}
973

    
974
static inline void set_bits(uint8_t *tab, int start, int len)
975
{
976
    int end, mask, end1;
977

    
978
    end = start + len;
979
    tab += start >> 3;
980
    mask = 0xff << (start & 7);
981
    if ((start & ~7) == (end & ~7)) {
982
        if (start < end) {
983
            mask &= ~(0xff << (end & 7));
984
            *tab |= mask;
985
        }
986
    } else {
987
        *tab++ |= mask;
988
        start = (start + 8) & ~7;
989
        end1 = end & ~7;
990
        while (start < end1) {
991
            *tab++ = 0xff;
992
            start += 8;
993
        }
994
        if (start < end) {
995
            mask = ~(0xff << (end & 7));
996
            *tab |= mask;
997
        }
998
    }
999
}
1000

    
1001
static void build_page_bitmap(PageDesc *p)
1002
{
1003
    int n, tb_start, tb_end;
1004
    TranslationBlock *tb;
1005

    
1006
    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1007

    
1008
    tb = p->first_tb;
1009
    while (tb != NULL) {
1010
        n = (uintptr_t)tb & 3;
1011
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1012
        /* NOTE: this is subtle as a TB may span two physical pages */
1013
        if (n == 0) {
1014
            /* NOTE: tb_end may be after the end of the page, but
1015
               it is not a problem */
1016
            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1017
            tb_end = tb_start + tb->size;
1018
            if (tb_end > TARGET_PAGE_SIZE)
1019
                tb_end = TARGET_PAGE_SIZE;
1020
        } else {
1021
            tb_start = 0;
1022
            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1023
        }
1024
        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1025
        tb = tb->page_next[n];
1026
    }
1027
}
1028

    
1029
TranslationBlock *tb_gen_code(CPUArchState *env,
1030
                              target_ulong pc, target_ulong cs_base,
1031
                              int flags, int cflags)
1032
{
1033
    TranslationBlock *tb;
1034
    uint8_t *tc_ptr;
1035
    tb_page_addr_t phys_pc, phys_page2;
1036
    target_ulong virt_page2;
1037
    int code_gen_size;
1038

    
1039
    phys_pc = get_page_addr_code(env, pc);
1040
    tb = tb_alloc(pc);
1041
    if (!tb) {
1042
        /* flush must be done */
1043
        tb_flush(env);
1044
        /* cannot fail at this point */
1045
        tb = tb_alloc(pc);
1046
        /* Don't forget to invalidate previous TB info.  */
1047
        tb_invalidated_flag = 1;
1048
    }
1049
    tc_ptr = code_gen_ptr;
1050
    tb->tc_ptr = tc_ptr;
1051
    tb->cs_base = cs_base;
1052
    tb->flags = flags;
1053
    tb->cflags = cflags;
1054
    cpu_gen_code(env, tb, &code_gen_size);
1055
    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1056
                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1057

    
1058
    /* check next page if needed */
1059
    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1060
    phys_page2 = -1;
1061
    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1062
        phys_page2 = get_page_addr_code(env, virt_page2);
1063
    }
1064
    tb_link_page(tb, phys_pc, phys_page2);
1065
    return tb;
1066
}
1067

    
1068
/*
1069
 * Invalidate all TBs which intersect with the target physical address range
1070
 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1071
 * 'is_cpu_write_access' should be true if called from a real cpu write
1072
 * access: the virtual CPU will exit the current TB if code is modified inside
1073
 * this TB.
1074
 */
1075
void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1076
                              int is_cpu_write_access)
1077
{
1078
    while (start < end) {
1079
        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1080
        start &= TARGET_PAGE_MASK;
1081
        start += TARGET_PAGE_SIZE;
1082
    }
1083
}
1084

    
1085
/*
1086
 * Invalidate all TBs which intersect with the target physical address range
1087
 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1088
 * 'is_cpu_write_access' should be true if called from a real cpu write
1089
 * access: the virtual CPU will exit the current TB if code is modified inside
1090
 * this TB.
1091
 */
1092
void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1093
                                   int is_cpu_write_access)
1094
{
1095
    TranslationBlock *tb, *tb_next, *saved_tb;
1096
    CPUArchState *env = cpu_single_env;
1097
    tb_page_addr_t tb_start, tb_end;
1098
    PageDesc *p;
1099
    int n;
1100
#ifdef TARGET_HAS_PRECISE_SMC
1101
    int current_tb_not_found = is_cpu_write_access;
1102
    TranslationBlock *current_tb = NULL;
1103
    int current_tb_modified = 0;
1104
    target_ulong current_pc = 0;
1105
    target_ulong current_cs_base = 0;
1106
    int current_flags = 0;
1107
#endif /* TARGET_HAS_PRECISE_SMC */
1108

    
1109
    p = page_find(start >> TARGET_PAGE_BITS);
1110
    if (!p)
1111
        return;
1112
    if (!p->code_bitmap &&
1113
        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1114
        is_cpu_write_access) {
1115
        /* build code bitmap */
1116
        build_page_bitmap(p);
1117
    }
1118

    
1119
    /* we remove all the TBs in the range [start, end[ */
1120
    /* XXX: see if in some cases it could be faster to invalidate all the code */
1121
    tb = p->first_tb;
1122
    while (tb != NULL) {
1123
        n = (uintptr_t)tb & 3;
1124
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1125
        tb_next = tb->page_next[n];
1126
        /* NOTE: this is subtle as a TB may span two physical pages */
1127
        if (n == 0) {
1128
            /* NOTE: tb_end may be after the end of the page, but
1129
               it is not a problem */
1130
            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1131
            tb_end = tb_start + tb->size;
1132
        } else {
1133
            tb_start = tb->page_addr[1];
1134
            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1135
        }
1136
        if (!(tb_end <= start || tb_start >= end)) {
1137
#ifdef TARGET_HAS_PRECISE_SMC
1138
            if (current_tb_not_found) {
1139
                current_tb_not_found = 0;
1140
                current_tb = NULL;
1141
                if (env->mem_io_pc) {
1142
                    /* now we have a real cpu fault */
1143
                    current_tb = tb_find_pc(env->mem_io_pc);
1144
                }
1145
            }
1146
            if (current_tb == tb &&
1147
                (current_tb->cflags & CF_COUNT_MASK) != 1) {
1148
                /* If we are modifying the current TB, we must stop
1149
                its execution. We could be more precise by checking
1150
                that the modification is after the current PC, but it
1151
                would require a specialized function to partially
1152
                restore the CPU state */
1153

    
1154
                current_tb_modified = 1;
1155
                cpu_restore_state(current_tb, env, env->mem_io_pc);
1156
                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1157
                                     &current_flags);
1158
            }
1159
#endif /* TARGET_HAS_PRECISE_SMC */
1160
            /* we need to do that to handle the case where a signal
1161
               occurs while doing tb_phys_invalidate() */
1162
            saved_tb = NULL;
1163
            if (env) {
1164
                saved_tb = env->current_tb;
1165
                env->current_tb = NULL;
1166
            }
1167
            tb_phys_invalidate(tb, -1);
1168
            if (env) {
1169
                env->current_tb = saved_tb;
1170
                if (env->interrupt_request && env->current_tb)
1171
                    cpu_interrupt(env, env->interrupt_request);
1172
            }
1173
        }
1174
        tb = tb_next;
1175
    }
1176
#if !defined(CONFIG_USER_ONLY)
1177
    /* if no code remaining, no need to continue to use slow writes */
1178
    if (!p->first_tb) {
1179
        invalidate_page_bitmap(p);
1180
        if (is_cpu_write_access) {
1181
            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1182
        }
1183
    }
1184
#endif
1185
#ifdef TARGET_HAS_PRECISE_SMC
1186
    if (current_tb_modified) {
1187
        /* we generate a block containing just the instruction
1188
           modifying the memory. It will ensure that it cannot modify
1189
           itself */
1190
        env->current_tb = NULL;
1191
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1192
        cpu_resume_from_signal(env, NULL);
1193
    }
1194
#endif
1195
}
1196

    
1197
/* len must be <= 8 and start must be a multiple of len */
1198
static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1199
{
1200
    PageDesc *p;
1201
    int offset, b;
1202
#if 0
1203
    if (1) {
1204
        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1205
                  cpu_single_env->mem_io_vaddr, len,
1206
                  cpu_single_env->eip,
1207
                  cpu_single_env->eip +
1208
                  (intptr_t)cpu_single_env->segs[R_CS].base);
1209
    }
1210
#endif
1211
    p = page_find(start >> TARGET_PAGE_BITS);
1212
    if (!p)
1213
        return;
1214
    if (p->code_bitmap) {
1215
        offset = start & ~TARGET_PAGE_MASK;
1216
        b = p->code_bitmap[offset >> 3] >> (offset & 7);
1217
        if (b & ((1 << len) - 1))
1218
            goto do_invalidate;
1219
    } else {
1220
    do_invalidate:
1221
        tb_invalidate_phys_page_range(start, start + len, 1);
1222
    }
1223
}
1224

    
1225
#if !defined(CONFIG_SOFTMMU)
1226
static void tb_invalidate_phys_page(tb_page_addr_t addr,
1227
                                    uintptr_t pc, void *puc)
1228
{
1229
    TranslationBlock *tb;
1230
    PageDesc *p;
1231
    int n;
1232
#ifdef TARGET_HAS_PRECISE_SMC
1233
    TranslationBlock *current_tb = NULL;
1234
    CPUArchState *env = cpu_single_env;
1235
    int current_tb_modified = 0;
1236
    target_ulong current_pc = 0;
1237
    target_ulong current_cs_base = 0;
1238
    int current_flags = 0;
1239
#endif
1240

    
1241
    addr &= TARGET_PAGE_MASK;
1242
    p = page_find(addr >> TARGET_PAGE_BITS);
1243
    if (!p)
1244
        return;
1245
    tb = p->first_tb;
1246
#ifdef TARGET_HAS_PRECISE_SMC
1247
    if (tb && pc != 0) {
1248
        current_tb = tb_find_pc(pc);
1249
    }
1250
#endif
1251
    while (tb != NULL) {
1252
        n = (uintptr_t)tb & 3;
1253
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1254
#ifdef TARGET_HAS_PRECISE_SMC
1255
        if (current_tb == tb &&
1256
            (current_tb->cflags & CF_COUNT_MASK) != 1) {
1257
                /* If we are modifying the current TB, we must stop
1258
                   its execution. We could be more precise by checking
1259
                   that the modification is after the current PC, but it
1260
                   would require a specialized function to partially
1261
                   restore the CPU state */
1262

    
1263
            current_tb_modified = 1;
1264
            cpu_restore_state(current_tb, env, pc);
1265
            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1266
                                 &current_flags);
1267
        }
1268
#endif /* TARGET_HAS_PRECISE_SMC */
1269
        tb_phys_invalidate(tb, addr);
1270
        tb = tb->page_next[n];
1271
    }
1272
    p->first_tb = NULL;
1273
#ifdef TARGET_HAS_PRECISE_SMC
1274
    if (current_tb_modified) {
1275
        /* we generate a block containing just the instruction
1276
           modifying the memory. It will ensure that it cannot modify
1277
           itself */
1278
        env->current_tb = NULL;
1279
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1280
        cpu_resume_from_signal(env, puc);
1281
    }
1282
#endif
1283
}
1284
#endif
1285

    
1286
/* add the tb in the target page and protect it if necessary */
1287
static inline void tb_alloc_page(TranslationBlock *tb,
1288
                                 unsigned int n, tb_page_addr_t page_addr)
1289
{
1290
    PageDesc *p;
1291
#ifndef CONFIG_USER_ONLY
1292
    bool page_already_protected;
1293
#endif
1294

    
1295
    tb->page_addr[n] = page_addr;
1296
    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1297
    tb->page_next[n] = p->first_tb;
1298
#ifndef CONFIG_USER_ONLY
1299
    page_already_protected = p->first_tb != NULL;
1300
#endif
1301
    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1302
    invalidate_page_bitmap(p);
1303

    
1304
#if defined(TARGET_HAS_SMC) || 1
1305

    
1306
#if defined(CONFIG_USER_ONLY)
1307
    if (p->flags & PAGE_WRITE) {
1308
        target_ulong addr;
1309
        PageDesc *p2;
1310
        int prot;
1311

    
1312
        /* force the host page as non writable (writes will have a
1313
           page fault + mprotect overhead) */
1314
        page_addr &= qemu_host_page_mask;
1315
        prot = 0;
1316
        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1317
            addr += TARGET_PAGE_SIZE) {
1318

    
1319
            p2 = page_find (addr >> TARGET_PAGE_BITS);
1320
            if (!p2)
1321
                continue;
1322
            prot |= p2->flags;
1323
            p2->flags &= ~PAGE_WRITE;
1324
          }
1325
        mprotect(g2h(page_addr), qemu_host_page_size,
1326
                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1327
#ifdef DEBUG_TB_INVALIDATE
1328
        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1329
               page_addr);
1330
#endif
1331
    }
1332
#else
1333
    /* if some code is already present, then the pages are already
1334
       protected. So we handle the case where only the first TB is
1335
       allocated in a physical page */
1336
    if (!page_already_protected) {
1337
        tlb_protect_code(page_addr);
1338
    }
1339
#endif
1340

    
1341
#endif /* TARGET_HAS_SMC */
1342
}
1343

    
1344
/* add a new TB and link it to the physical page tables. phys_page2 is
1345
   (-1) to indicate that only one page contains the TB. */
1346
void tb_link_page(TranslationBlock *tb,
1347
                  tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1348
{
1349
    unsigned int h;
1350
    TranslationBlock **ptb;
1351

    
1352
    /* Grab the mmap lock to stop another thread invalidating this TB
1353
       before we are done.  */
1354
    mmap_lock();
1355
    /* add in the physical hash table */
1356
    h = tb_phys_hash_func(phys_pc);
1357
    ptb = &tb_phys_hash[h];
1358
    tb->phys_hash_next = *ptb;
1359
    *ptb = tb;
1360

    
1361
    /* add in the page list */
1362
    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1363
    if (phys_page2 != -1)
1364
        tb_alloc_page(tb, 1, phys_page2);
1365
    else
1366
        tb->page_addr[1] = -1;
1367

    
1368
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1369
    tb->jmp_next[0] = NULL;
1370
    tb->jmp_next[1] = NULL;
1371

    
1372
    /* init original jump addresses */
1373
    if (tb->tb_next_offset[0] != 0xffff)
1374
        tb_reset_jump(tb, 0);
1375
    if (tb->tb_next_offset[1] != 0xffff)
1376
        tb_reset_jump(tb, 1);
1377

    
1378
#ifdef DEBUG_TB_CHECK
1379
    tb_page_check();
1380
#endif
1381
    mmap_unlock();
1382
}
1383

    
1384
/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1385
   tb[1].tc_ptr. Return NULL if not found */
1386
TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1387
{
1388
    int m_min, m_max, m;
1389
    uintptr_t v;
1390
    TranslationBlock *tb;
1391

    
1392
    if (nb_tbs <= 0)
1393
        return NULL;
1394
    if (tc_ptr < (uintptr_t)code_gen_buffer ||
1395
        tc_ptr >= (uintptr_t)code_gen_ptr) {
1396
        return NULL;
1397
    }
1398
    /* binary search (cf Knuth) */
1399
    m_min = 0;
1400
    m_max = nb_tbs - 1;
1401
    while (m_min <= m_max) {
1402
        m = (m_min + m_max) >> 1;
1403
        tb = &tbs[m];
1404
        v = (uintptr_t)tb->tc_ptr;
1405
        if (v == tc_ptr)
1406
            return tb;
1407
        else if (tc_ptr < v) {
1408
            m_max = m - 1;
1409
        } else {
1410
            m_min = m + 1;
1411
        }
1412
    }
1413
    return &tbs[m_max];
1414
}
1415

    
1416
static void tb_reset_jump_recursive(TranslationBlock *tb);
1417

    
1418
static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1419
{
1420
    TranslationBlock *tb1, *tb_next, **ptb;
1421
    unsigned int n1;
1422

    
1423
    tb1 = tb->jmp_next[n];
1424
    if (tb1 != NULL) {
1425
        /* find head of list */
1426
        for(;;) {
1427
            n1 = (uintptr_t)tb1 & 3;
1428
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1429
            if (n1 == 2)
1430
                break;
1431
            tb1 = tb1->jmp_next[n1];
1432
        }
1433
        /* we are now sure now that tb jumps to tb1 */
1434
        tb_next = tb1;
1435

    
1436
        /* remove tb from the jmp_first list */
1437
        ptb = &tb_next->jmp_first;
1438
        for(;;) {
1439
            tb1 = *ptb;
1440
            n1 = (uintptr_t)tb1 & 3;
1441
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1442
            if (n1 == n && tb1 == tb)
1443
                break;
1444
            ptb = &tb1->jmp_next[n1];
1445
        }
1446
        *ptb = tb->jmp_next[n];
1447
        tb->jmp_next[n] = NULL;
1448

    
1449
        /* suppress the jump to next tb in generated code */
1450
        tb_reset_jump(tb, n);
1451

    
1452
        /* suppress jumps in the tb on which we could have jumped */
1453
        tb_reset_jump_recursive(tb_next);
1454
    }
1455
}
1456

    
1457
static void tb_reset_jump_recursive(TranslationBlock *tb)
1458
{
1459
    tb_reset_jump_recursive2(tb, 0);
1460
    tb_reset_jump_recursive2(tb, 1);
1461
}
1462

    
1463
#if defined(TARGET_HAS_ICE)
1464
#if defined(CONFIG_USER_ONLY)
1465
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1466
{
1467
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
1468
}
1469
#else
1470
void tb_invalidate_phys_addr(target_phys_addr_t addr)
1471
{
1472
    ram_addr_t ram_addr;
1473
    MemoryRegionSection *section;
1474

    
1475
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
1476
    if (!(memory_region_is_ram(section->mr)
1477
          || (section->mr->rom_device && section->mr->readable))) {
1478
        return;
1479
    }
1480
    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1481
        + memory_region_section_addr(section, addr);
1482
    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1483
}
1484

    
1485
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1486
{
1487
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1488
            (pc & ~TARGET_PAGE_MASK));
1489
}
1490
#endif
1491
#endif /* TARGET_HAS_ICE */
1492

    
1493
#if defined(CONFIG_USER_ONLY)
1494
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1495

    
1496
{
1497
}
1498

    
1499
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1500
                          int flags, CPUWatchpoint **watchpoint)
1501
{
1502
    return -ENOSYS;
1503
}
1504
#else
1505
/* Add a watchpoint.  */
1506
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1507
                          int flags, CPUWatchpoint **watchpoint)
1508
{
1509
    target_ulong len_mask = ~(len - 1);
1510
    CPUWatchpoint *wp;
1511

    
1512
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1513
    if ((len & (len - 1)) || (addr & ~len_mask) ||
1514
            len == 0 || len > TARGET_PAGE_SIZE) {
1515
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1516
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1517
        return -EINVAL;
1518
    }
1519
    wp = g_malloc(sizeof(*wp));
1520

    
1521
    wp->vaddr = addr;
1522
    wp->len_mask = len_mask;
1523
    wp->flags = flags;
1524

    
1525
    /* keep all GDB-injected watchpoints in front */
1526
    if (flags & BP_GDB)
1527
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1528
    else
1529
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1530

    
1531
    tlb_flush_page(env, addr);
1532

    
1533
    if (watchpoint)
1534
        *watchpoint = wp;
1535
    return 0;
1536
}
1537

    
1538
/* Remove a specific watchpoint.  */
1539
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1540
                          int flags)
1541
{
1542
    target_ulong len_mask = ~(len - 1);
1543
    CPUWatchpoint *wp;
1544

    
1545
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1546
        if (addr == wp->vaddr && len_mask == wp->len_mask
1547
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1548
            cpu_watchpoint_remove_by_ref(env, wp);
1549
            return 0;
1550
        }
1551
    }
1552
    return -ENOENT;
1553
}
1554

    
1555
/* Remove a specific watchpoint by reference.  */
1556
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1557
{
1558
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1559

    
1560
    tlb_flush_page(env, watchpoint->vaddr);
1561

    
1562
    g_free(watchpoint);
1563
}
1564

    
1565
/* Remove all matching watchpoints.  */
1566
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1567
{
1568
    CPUWatchpoint *wp, *next;
1569

    
1570
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1571
        if (wp->flags & mask)
1572
            cpu_watchpoint_remove_by_ref(env, wp);
1573
    }
1574
}
1575
#endif
1576

    
1577
/* Add a breakpoint.  */
1578
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1579
                          CPUBreakpoint **breakpoint)
1580
{
1581
#if defined(TARGET_HAS_ICE)
1582
    CPUBreakpoint *bp;
1583

    
1584
    bp = g_malloc(sizeof(*bp));
1585

    
1586
    bp->pc = pc;
1587
    bp->flags = flags;
1588

    
1589
    /* keep all GDB-injected breakpoints in front */
1590
    if (flags & BP_GDB)
1591
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1592
    else
1593
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1594

    
1595
    breakpoint_invalidate(env, pc);
1596

    
1597
    if (breakpoint)
1598
        *breakpoint = bp;
1599
    return 0;
1600
#else
1601
    return -ENOSYS;
1602
#endif
1603
}
1604

    
1605
/* Remove a specific breakpoint.  */
1606
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1607
{
1608
#if defined(TARGET_HAS_ICE)
1609
    CPUBreakpoint *bp;
1610

    
1611
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1612
        if (bp->pc == pc && bp->flags == flags) {
1613
            cpu_breakpoint_remove_by_ref(env, bp);
1614
            return 0;
1615
        }
1616
    }
1617
    return -ENOENT;
1618
#else
1619
    return -ENOSYS;
1620
#endif
1621
}
1622

    
1623
/* Remove a specific breakpoint by reference.  */
1624
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1625
{
1626
#if defined(TARGET_HAS_ICE)
1627
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1628

    
1629
    breakpoint_invalidate(env, breakpoint->pc);
1630

    
1631
    g_free(breakpoint);
1632
#endif
1633
}
1634

    
1635
/* Remove all matching breakpoints. */
1636
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1637
{
1638
#if defined(TARGET_HAS_ICE)
1639
    CPUBreakpoint *bp, *next;
1640

    
1641
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1642
        if (bp->flags & mask)
1643
            cpu_breakpoint_remove_by_ref(env, bp);
1644
    }
1645
#endif
1646
}
1647

    
1648
/* enable or disable single step mode. EXCP_DEBUG is returned by the
1649
   CPU loop after each instruction */
1650
void cpu_single_step(CPUArchState *env, int enabled)
1651
{
1652
#if defined(TARGET_HAS_ICE)
1653
    if (env->singlestep_enabled != enabled) {
1654
        env->singlestep_enabled = enabled;
1655
        if (kvm_enabled())
1656
            kvm_update_guest_debug(env, 0);
1657
        else {
1658
            /* must flush all the translated code to avoid inconsistencies */
1659
            /* XXX: only flush what is necessary */
1660
            tb_flush(env);
1661
        }
1662
    }
1663
#endif
1664
}
1665

    
1666
static void cpu_unlink_tb(CPUArchState *env)
1667
{
1668
    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
1669
       problem and hope the cpu will stop of its own accord.  For userspace
1670
       emulation this often isn't actually as bad as it sounds.  Often
1671
       signals are used primarily to interrupt blocking syscalls.  */
1672
    TranslationBlock *tb;
1673
    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1674

    
1675
    spin_lock(&interrupt_lock);
1676
    tb = env->current_tb;
1677
    /* if the cpu is currently executing code, we must unlink it and
1678
       all the potentially executing TB */
1679
    if (tb) {
1680
        env->current_tb = NULL;
1681
        tb_reset_jump_recursive(tb);
1682
    }
1683
    spin_unlock(&interrupt_lock);
1684
}
1685

    
1686
#ifndef CONFIG_USER_ONLY
1687
/* mask must never be zero, except for A20 change call */
1688
static void tcg_handle_interrupt(CPUArchState *env, int mask)
1689
{
1690
    int old_mask;
1691

    
1692
    old_mask = env->interrupt_request;
1693
    env->interrupt_request |= mask;
1694

    
1695
    /*
1696
     * If called from iothread context, wake the target cpu in
1697
     * case its halted.
1698
     */
1699
    if (!qemu_cpu_is_self(env)) {
1700
        qemu_cpu_kick(env);
1701
        return;
1702
    }
1703

    
1704
    if (use_icount) {
1705
        env->icount_decr.u16.high = 0xffff;
1706
        if (!can_do_io(env)
1707
            && (mask & ~old_mask) != 0) {
1708
            cpu_abort(env, "Raised interrupt while not in I/O function");
1709
        }
1710
    } else {
1711
        cpu_unlink_tb(env);
1712
    }
1713
}
1714

    
1715
CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1716

    
1717
#else /* CONFIG_USER_ONLY */
1718

    
1719
void cpu_interrupt(CPUArchState *env, int mask)
1720
{
1721
    env->interrupt_request |= mask;
1722
    cpu_unlink_tb(env);
1723
}
1724
#endif /* CONFIG_USER_ONLY */
1725

    
1726
void cpu_reset_interrupt(CPUArchState *env, int mask)
1727
{
1728
    env->interrupt_request &= ~mask;
1729
}
1730

    
1731
void cpu_exit(CPUArchState *env)
1732
{
1733
    env->exit_request = 1;
1734
    cpu_unlink_tb(env);
1735
}
1736

    
1737
void cpu_abort(CPUArchState *env, const char *fmt, ...)
1738
{
1739
    va_list ap;
1740
    va_list ap2;
1741

    
1742
    va_start(ap, fmt);
1743
    va_copy(ap2, ap);
1744
    fprintf(stderr, "qemu: fatal: ");
1745
    vfprintf(stderr, fmt, ap);
1746
    fprintf(stderr, "\n");
1747
#ifdef TARGET_I386
1748
    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1749
#else
1750
    cpu_dump_state(env, stderr, fprintf, 0);
1751
#endif
1752
    if (qemu_log_enabled()) {
1753
        qemu_log("qemu: fatal: ");
1754
        qemu_log_vprintf(fmt, ap2);
1755
        qemu_log("\n");
1756
#ifdef TARGET_I386
1757
        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1758
#else
1759
        log_cpu_state(env, 0);
1760
#endif
1761
        qemu_log_flush();
1762
        qemu_log_close();
1763
    }
1764
    va_end(ap2);
1765
    va_end(ap);
1766
#if defined(CONFIG_USER_ONLY)
1767
    {
1768
        struct sigaction act;
1769
        sigfillset(&act.sa_mask);
1770
        act.sa_handler = SIG_DFL;
1771
        sigaction(SIGABRT, &act, NULL);
1772
    }
1773
#endif
1774
    abort();
1775
}
1776

    
1777
CPUArchState *cpu_copy(CPUArchState *env)
1778
{
1779
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
1780
    CPUArchState *next_cpu = new_env->next_cpu;
1781
    int cpu_index = new_env->cpu_index;
1782
#if defined(TARGET_HAS_ICE)
1783
    CPUBreakpoint *bp;
1784
    CPUWatchpoint *wp;
1785
#endif
1786

    
1787
    memcpy(new_env, env, sizeof(CPUArchState));
1788

    
1789
    /* Preserve chaining and index. */
1790
    new_env->next_cpu = next_cpu;
1791
    new_env->cpu_index = cpu_index;
1792

    
1793
    /* Clone all break/watchpoints.
1794
       Note: Once we support ptrace with hw-debug register access, make sure
1795
       BP_CPU break/watchpoints are handled correctly on clone. */
1796
    QTAILQ_INIT(&env->breakpoints);
1797
    QTAILQ_INIT(&env->watchpoints);
1798
#if defined(TARGET_HAS_ICE)
1799
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1800
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1801
    }
1802
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1803
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1804
                              wp->flags, NULL);
1805
    }
1806
#endif
1807

    
1808
    return new_env;
1809
}
1810

    
1811
#if !defined(CONFIG_USER_ONLY)
1812
void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1813
{
1814
    unsigned int i;
1815

    
1816
    /* Discard jump cache entries for any tb which might potentially
1817
       overlap the flushed page.  */
1818
    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1819
    memset (&env->tb_jmp_cache[i], 0, 
1820
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1821

    
1822
    i = tb_jmp_cache_hash_page(addr);
1823
    memset (&env->tb_jmp_cache[i], 0, 
1824
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1825
}
1826

    
1827
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1828
                                      uintptr_t length)
1829
{
1830
    uintptr_t start1;
1831

    
1832
    /* we modify the TLB cache so that the dirty bit will be set again
1833
       when accessing the range */
1834
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1835
    /* Check that we don't span multiple blocks - this breaks the
1836
       address comparisons below.  */
1837
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1838
            != (end - 1) - start) {
1839
        abort();
1840
    }
1841
    cpu_tlb_reset_dirty_all(start1, length);
1842

    
1843
}
1844

    
1845
/* Note: start and end must be within the same ram block.  */
1846
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1847
                                     int dirty_flags)
1848
{
1849
    uintptr_t length;
1850

    
1851
    start &= TARGET_PAGE_MASK;
1852
    end = TARGET_PAGE_ALIGN(end);
1853

    
1854
    length = end - start;
1855
    if (length == 0)
1856
        return;
1857
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1858

    
1859
    if (tcg_enabled()) {
1860
        tlb_reset_dirty_range_all(start, end, length);
1861
    }
1862
}
1863

    
1864
int cpu_physical_memory_set_dirty_tracking(int enable)
1865
{
1866
    int ret = 0;
1867
    in_migration = enable;
1868
    return ret;
1869
}
1870

    
1871
target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1872
                                                   MemoryRegionSection *section,
1873
                                                   target_ulong vaddr,
1874
                                                   target_phys_addr_t paddr,
1875
                                                   int prot,
1876
                                                   target_ulong *address)
1877
{
1878
    target_phys_addr_t iotlb;
1879
    CPUWatchpoint *wp;
1880

    
1881
    if (memory_region_is_ram(section->mr)) {
1882
        /* Normal RAM.  */
1883
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1884
            + memory_region_section_addr(section, paddr);
1885
        if (!section->readonly) {
1886
            iotlb |= phys_section_notdirty;
1887
        } else {
1888
            iotlb |= phys_section_rom;
1889
        }
1890
    } else {
1891
        /* IO handlers are currently passed a physical address.
1892
           It would be nice to pass an offset from the base address
1893
           of that region.  This would avoid having to special case RAM,
1894
           and avoid full address decoding in every device.
1895
           We can't use the high bits of pd for this because
1896
           IO_MEM_ROMD uses these as a ram address.  */
1897
        iotlb = section - phys_sections;
1898
        iotlb += memory_region_section_addr(section, paddr);
1899
    }
1900

    
1901
    /* Make accesses to pages with watchpoints go via the
1902
       watchpoint trap routines.  */
1903
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1904
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1905
            /* Avoid trapping reads of pages with a write breakpoint. */
1906
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1907
                iotlb = phys_section_watch + paddr;
1908
                *address |= TLB_MMIO;
1909
                break;
1910
            }
1911
        }
1912
    }
1913

    
1914
    return iotlb;
1915
}
1916

    
1917
#else
1918
/*
1919
 * Walks guest process memory "regions" one by one
1920
 * and calls callback function 'fn' for each region.
1921
 */
1922

    
1923
struct walk_memory_regions_data
1924
{
1925
    walk_memory_regions_fn fn;
1926
    void *priv;
1927
    uintptr_t start;
1928
    int prot;
1929
};
1930

    
1931
static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1932
                                   abi_ulong end, int new_prot)
1933
{
1934
    if (data->start != -1ul) {
1935
        int rc = data->fn(data->priv, data->start, end, data->prot);
1936
        if (rc != 0) {
1937
            return rc;
1938
        }
1939
    }
1940

    
1941
    data->start = (new_prot ? end : -1ul);
1942
    data->prot = new_prot;
1943

    
1944
    return 0;
1945
}
1946

    
1947
static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1948
                                 abi_ulong base, int level, void **lp)
1949
{
1950
    abi_ulong pa;
1951
    int i, rc;
1952

    
1953
    if (*lp == NULL) {
1954
        return walk_memory_regions_end(data, base, 0);
1955
    }
1956

    
1957
    if (level == 0) {
1958
        PageDesc *pd = *lp;
1959
        for (i = 0; i < L2_SIZE; ++i) {
1960
            int prot = pd[i].flags;
1961

    
1962
            pa = base | (i << TARGET_PAGE_BITS);
1963
            if (prot != data->prot) {
1964
                rc = walk_memory_regions_end(data, pa, prot);
1965
                if (rc != 0) {
1966
                    return rc;
1967
                }
1968
            }
1969
        }
1970
    } else {
1971
        void **pp = *lp;
1972
        for (i = 0; i < L2_SIZE; ++i) {
1973
            pa = base | ((abi_ulong)i <<
1974
                (TARGET_PAGE_BITS + L2_BITS * level));
1975
            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1976
            if (rc != 0) {
1977
                return rc;
1978
            }
1979
        }
1980
    }
1981

    
1982
    return 0;
1983
}
1984

    
1985
int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1986
{
1987
    struct walk_memory_regions_data data;
1988
    uintptr_t i;
1989

    
1990
    data.fn = fn;
1991
    data.priv = priv;
1992
    data.start = -1ul;
1993
    data.prot = 0;
1994

    
1995
    for (i = 0; i < V_L1_SIZE; i++) {
1996
        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1997
                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1998
        if (rc != 0) {
1999
            return rc;
2000
        }
2001
    }
2002

    
2003
    return walk_memory_regions_end(&data, 0, 0);
2004
}
2005

    
2006
static int dump_region(void *priv, abi_ulong start,
2007
    abi_ulong end, unsigned long prot)
2008
{
2009
    FILE *f = (FILE *)priv;
2010

    
2011
    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2012
        " "TARGET_ABI_FMT_lx" %c%c%c\n",
2013
        start, end, end - start,
2014
        ((prot & PAGE_READ) ? 'r' : '-'),
2015
        ((prot & PAGE_WRITE) ? 'w' : '-'),
2016
        ((prot & PAGE_EXEC) ? 'x' : '-'));
2017

    
2018
    return (0);
2019
}
2020

    
2021
/* dump memory mappings */
2022
void page_dump(FILE *f)
2023
{
2024
    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2025
            "start", "end", "size", "prot");
2026
    walk_memory_regions(f, dump_region);
2027
}
2028

    
2029
int page_get_flags(target_ulong address)
2030
{
2031
    PageDesc *p;
2032

    
2033
    p = page_find(address >> TARGET_PAGE_BITS);
2034
    if (!p)
2035
        return 0;
2036
    return p->flags;
2037
}
2038

    
2039
/* Modify the flags of a page and invalidate the code if necessary.
2040
   The flag PAGE_WRITE_ORG is positioned automatically depending
2041
   on PAGE_WRITE.  The mmap_lock should already be held.  */
2042
void page_set_flags(target_ulong start, target_ulong end, int flags)
2043
{
2044
    target_ulong addr, len;
2045

    
2046
    /* This function should never be called with addresses outside the
2047
       guest address space.  If this assert fires, it probably indicates
2048
       a missing call to h2g_valid.  */
2049
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2050
    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2051
#endif
2052
    assert(start < end);
2053

    
2054
    start = start & TARGET_PAGE_MASK;
2055
    end = TARGET_PAGE_ALIGN(end);
2056

    
2057
    if (flags & PAGE_WRITE) {
2058
        flags |= PAGE_WRITE_ORG;
2059
    }
2060

    
2061
    for (addr = start, len = end - start;
2062
         len != 0;
2063
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2064
        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2065

    
2066
        /* If the write protection bit is set, then we invalidate
2067
           the code inside.  */
2068
        if (!(p->flags & PAGE_WRITE) &&
2069
            (flags & PAGE_WRITE) &&
2070
            p->first_tb) {
2071
            tb_invalidate_phys_page(addr, 0, NULL);
2072
        }
2073
        p->flags = flags;
2074
    }
2075
}
2076

    
2077
int page_check_range(target_ulong start, target_ulong len, int flags)
2078
{
2079
    PageDesc *p;
2080
    target_ulong end;
2081
    target_ulong addr;
2082

    
2083
    /* This function should never be called with addresses outside the
2084
       guest address space.  If this assert fires, it probably indicates
2085
       a missing call to h2g_valid.  */
2086
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2087
    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2088
#endif
2089

    
2090
    if (len == 0) {
2091
        return 0;
2092
    }
2093
    if (start + len - 1 < start) {
2094
        /* We've wrapped around.  */
2095
        return -1;
2096
    }
2097

    
2098
    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2099
    start = start & TARGET_PAGE_MASK;
2100

    
2101
    for (addr = start, len = end - start;
2102
         len != 0;
2103
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2104
        p = page_find(addr >> TARGET_PAGE_BITS);
2105
        if( !p )
2106
            return -1;
2107
        if( !(p->flags & PAGE_VALID) )
2108
            return -1;
2109

    
2110
        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2111
            return -1;
2112
        if (flags & PAGE_WRITE) {
2113
            if (!(p->flags & PAGE_WRITE_ORG))
2114
                return -1;
2115
            /* unprotect the page if it was put read-only because it
2116
               contains translated code */
2117
            if (!(p->flags & PAGE_WRITE)) {
2118
                if (!page_unprotect(addr, 0, NULL))
2119
                    return -1;
2120
            }
2121
            return 0;
2122
        }
2123
    }
2124
    return 0;
2125
}
2126

    
2127
/* called from signal handler: invalidate the code and unprotect the
2128
   page. Return TRUE if the fault was successfully handled. */
2129
int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2130
{
2131
    unsigned int prot;
2132
    PageDesc *p;
2133
    target_ulong host_start, host_end, addr;
2134

    
2135
    /* Technically this isn't safe inside a signal handler.  However we
2136
       know this only ever happens in a synchronous SEGV handler, so in
2137
       practice it seems to be ok.  */
2138
    mmap_lock();
2139

    
2140
    p = page_find(address >> TARGET_PAGE_BITS);
2141
    if (!p) {
2142
        mmap_unlock();
2143
        return 0;
2144
    }
2145

    
2146
    /* if the page was really writable, then we change its
2147
       protection back to writable */
2148
    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2149
        host_start = address & qemu_host_page_mask;
2150
        host_end = host_start + qemu_host_page_size;
2151

    
2152
        prot = 0;
2153
        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2154
            p = page_find(addr >> TARGET_PAGE_BITS);
2155
            p->flags |= PAGE_WRITE;
2156
            prot |= p->flags;
2157

    
2158
            /* and since the content will be modified, we must invalidate
2159
               the corresponding translated code. */
2160
            tb_invalidate_phys_page(addr, pc, puc);
2161
#ifdef DEBUG_TB_CHECK
2162
            tb_invalidate_check(addr);
2163
#endif
2164
        }
2165
        mprotect((void *)g2h(host_start), qemu_host_page_size,
2166
                 prot & PAGE_BITS);
2167

    
2168
        mmap_unlock();
2169
        return 1;
2170
    }
2171
    mmap_unlock();
2172
    return 0;
2173
}
2174
#endif /* defined(CONFIG_USER_ONLY) */
2175

    
2176
#if !defined(CONFIG_USER_ONLY)
2177

    
2178
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2179
typedef struct subpage_t {
2180
    MemoryRegion iomem;
2181
    target_phys_addr_t base;
2182
    uint16_t sub_section[TARGET_PAGE_SIZE];
2183
} subpage_t;
2184

    
2185
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2186
                             uint16_t section);
2187
static subpage_t *subpage_init(target_phys_addr_t base);
2188
static void destroy_page_desc(uint16_t section_index)
2189
{
2190
    MemoryRegionSection *section = &phys_sections[section_index];
2191
    MemoryRegion *mr = section->mr;
2192

    
2193
    if (mr->subpage) {
2194
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
2195
        memory_region_destroy(&subpage->iomem);
2196
        g_free(subpage);
2197
    }
2198
}
2199

    
2200
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2201
{
2202
    unsigned i;
2203
    PhysPageEntry *p;
2204

    
2205
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
2206
        return;
2207
    }
2208

    
2209
    p = phys_map_nodes[lp->ptr];
2210
    for (i = 0; i < L2_SIZE; ++i) {
2211
        if (!p[i].is_leaf) {
2212
            destroy_l2_mapping(&p[i], level - 1);
2213
        } else {
2214
            destroy_page_desc(p[i].ptr);
2215
        }
2216
    }
2217
    lp->is_leaf = 0;
2218
    lp->ptr = PHYS_MAP_NODE_NIL;
2219
}
2220

    
2221
static void destroy_all_mappings(void)
2222
{
2223
    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2224
    phys_map_nodes_reset();
2225
}
2226

    
2227
static uint16_t phys_section_add(MemoryRegionSection *section)
2228
{
2229
    if (phys_sections_nb == phys_sections_nb_alloc) {
2230
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2231
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
2232
                                phys_sections_nb_alloc);
2233
    }
2234
    phys_sections[phys_sections_nb] = *section;
2235
    return phys_sections_nb++;
2236
}
2237

    
2238
static void phys_sections_clear(void)
2239
{
2240
    phys_sections_nb = 0;
2241
}
2242

    
2243
static void register_subpage(MemoryRegionSection *section)
2244
{
2245
    subpage_t *subpage;
2246
    target_phys_addr_t base = section->offset_within_address_space
2247
        & TARGET_PAGE_MASK;
2248
    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2249
    MemoryRegionSection subsection = {
2250
        .offset_within_address_space = base,
2251
        .size = TARGET_PAGE_SIZE,
2252
    };
2253
    target_phys_addr_t start, end;
2254

    
2255
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2256

    
2257
    if (!(existing->mr->subpage)) {
2258
        subpage = subpage_init(base);
2259
        subsection.mr = &subpage->iomem;
2260
        phys_page_set(base >> TARGET_PAGE_BITS, 1,
2261
                      phys_section_add(&subsection));
2262
    } else {
2263
        subpage = container_of(existing->mr, subpage_t, iomem);
2264
    }
2265
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2266
    end = start + section->size - 1;
2267
    subpage_register(subpage, start, end, phys_section_add(section));
2268
}
2269

    
2270

    
2271
static void register_multipage(MemoryRegionSection *section)
2272
{
2273
    target_phys_addr_t start_addr = section->offset_within_address_space;
2274
    ram_addr_t size = section->size;
2275
    target_phys_addr_t addr;
2276
    uint16_t section_index = phys_section_add(section);
2277

    
2278
    assert(size);
2279

    
2280
    addr = start_addr;
2281
    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2282
                  section_index);
2283
}
2284

    
2285
void cpu_register_physical_memory_log(MemoryRegionSection *section,
2286
                                      bool readonly)
2287
{
2288
    MemoryRegionSection now = *section, remain = *section;
2289

    
2290
    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2291
        || (now.size < TARGET_PAGE_SIZE)) {
2292
        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2293
                       - now.offset_within_address_space,
2294
                       now.size);
2295
        register_subpage(&now);
2296
        remain.size -= now.size;
2297
        remain.offset_within_address_space += now.size;
2298
        remain.offset_within_region += now.size;
2299
    }
2300
    while (remain.size >= TARGET_PAGE_SIZE) {
2301
        now = remain;
2302
        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2303
            now.size = TARGET_PAGE_SIZE;
2304
            register_subpage(&now);
2305
        } else {
2306
            now.size &= TARGET_PAGE_MASK;
2307
            register_multipage(&now);
2308
        }
2309
        remain.size -= now.size;
2310
        remain.offset_within_address_space += now.size;
2311
        remain.offset_within_region += now.size;
2312
    }
2313
    now = remain;
2314
    if (now.size) {
2315
        register_subpage(&now);
2316
    }
2317
}
2318

    
2319

    
2320
void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2321
{
2322
    if (kvm_enabled())
2323
        kvm_coalesce_mmio_region(addr, size);
2324
}
2325

    
2326
void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2327
{
2328
    if (kvm_enabled())
2329
        kvm_uncoalesce_mmio_region(addr, size);
2330
}
2331

    
2332
void qemu_flush_coalesced_mmio_buffer(void)
2333
{
2334
    if (kvm_enabled())
2335
        kvm_flush_coalesced_mmio_buffer();
2336
}
2337

    
2338
#if defined(__linux__) && !defined(TARGET_S390X)
2339

    
2340
#include <sys/vfs.h>
2341

    
2342
#define HUGETLBFS_MAGIC       0x958458f6
2343

    
2344
static long gethugepagesize(const char *path)
2345
{
2346
    struct statfs fs;
2347
    int ret;
2348

    
2349
    do {
2350
        ret = statfs(path, &fs);
2351
    } while (ret != 0 && errno == EINTR);
2352

    
2353
    if (ret != 0) {
2354
        perror(path);
2355
        return 0;
2356
    }
2357

    
2358
    if (fs.f_type != HUGETLBFS_MAGIC)
2359
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2360

    
2361
    return fs.f_bsize;
2362
}
2363

    
2364
static void *file_ram_alloc(RAMBlock *block,
2365
                            ram_addr_t memory,
2366
                            const char *path)
2367
{
2368
    char *filename;
2369
    void *area;
2370
    int fd;
2371
#ifdef MAP_POPULATE
2372
    int flags;
2373
#endif
2374
    unsigned long hpagesize;
2375

    
2376
    hpagesize = gethugepagesize(path);
2377
    if (!hpagesize) {
2378
        return NULL;
2379
    }
2380

    
2381
    if (memory < hpagesize) {
2382
        return NULL;
2383
    }
2384

    
2385
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
2386
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2387
        return NULL;
2388
    }
2389

    
2390
    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2391
        return NULL;
2392
    }
2393

    
2394
    fd = mkstemp(filename);
2395
    if (fd < 0) {
2396
        perror("unable to create backing store for hugepages");
2397
        free(filename);
2398
        return NULL;
2399
    }
2400
    unlink(filename);
2401
    free(filename);
2402

    
2403
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
2404

    
2405
    /*
2406
     * ftruncate is not supported by hugetlbfs in older
2407
     * hosts, so don't bother bailing out on errors.
2408
     * If anything goes wrong with it under other filesystems,
2409
     * mmap will fail.
2410
     */
2411
    if (ftruncate(fd, memory))
2412
        perror("ftruncate");
2413

    
2414
#ifdef MAP_POPULATE
2415
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2416
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
2417
     * to sidestep this quirk.
2418
     */
2419
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2420
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2421
#else
2422
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2423
#endif
2424
    if (area == MAP_FAILED) {
2425
        perror("file_ram_alloc: can't mmap RAM pages");
2426
        close(fd);
2427
        return (NULL);
2428
    }
2429
    block->fd = fd;
2430
    return area;
2431
}
2432
#endif
2433

    
2434
static ram_addr_t find_ram_offset(ram_addr_t size)
2435
{
2436
    RAMBlock *block, *next_block;
2437
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2438

    
2439
    if (QLIST_EMPTY(&ram_list.blocks))
2440
        return 0;
2441

    
2442
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2443
        ram_addr_t end, next = RAM_ADDR_MAX;
2444

    
2445
        end = block->offset + block->length;
2446

    
2447
        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2448
            if (next_block->offset >= end) {
2449
                next = MIN(next, next_block->offset);
2450
            }
2451
        }
2452
        if (next - end >= size && next - end < mingap) {
2453
            offset = end;
2454
            mingap = next - end;
2455
        }
2456
    }
2457

    
2458
    if (offset == RAM_ADDR_MAX) {
2459
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2460
                (uint64_t)size);
2461
        abort();
2462
    }
2463

    
2464
    return offset;
2465
}
2466

    
2467
static ram_addr_t last_ram_offset(void)
2468
{
2469
    RAMBlock *block;
2470
    ram_addr_t last = 0;
2471

    
2472
    QLIST_FOREACH(block, &ram_list.blocks, next)
2473
        last = MAX(last, block->offset + block->length);
2474

    
2475
    return last;
2476
}
2477

    
2478
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2479
{
2480
    RAMBlock *new_block, *block;
2481

    
2482
    new_block = NULL;
2483
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2484
        if (block->offset == addr) {
2485
            new_block = block;
2486
            break;
2487
        }
2488
    }
2489
    assert(new_block);
2490
    assert(!new_block->idstr[0]);
2491

    
2492
    if (dev) {
2493
        char *id = qdev_get_dev_path(dev);
2494
        if (id) {
2495
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2496
            g_free(id);
2497
        }
2498
    }
2499
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2500

    
2501
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2502
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2503
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2504
                    new_block->idstr);
2505
            abort();
2506
        }
2507
    }
2508
}
2509

    
2510
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2511
                                   MemoryRegion *mr)
2512
{
2513
    RAMBlock *new_block;
2514

    
2515
    size = TARGET_PAGE_ALIGN(size);
2516
    new_block = g_malloc0(sizeof(*new_block));
2517

    
2518
    new_block->mr = mr;
2519
    new_block->offset = find_ram_offset(size);
2520
    if (host) {
2521
        new_block->host = host;
2522
        new_block->flags |= RAM_PREALLOC_MASK;
2523
    } else {
2524
        if (mem_path) {
2525
#if defined (__linux__) && !defined(TARGET_S390X)
2526
            new_block->host = file_ram_alloc(new_block, size, mem_path);
2527
            if (!new_block->host) {
2528
                new_block->host = qemu_vmalloc(size);
2529
                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2530
            }
2531
#else
2532
            fprintf(stderr, "-mem-path option unsupported\n");
2533
            exit(1);
2534
#endif
2535
        } else {
2536
            if (xen_enabled()) {
2537
                xen_ram_alloc(new_block->offset, size, mr);
2538
            } else if (kvm_enabled()) {
2539
                /* some s390/kvm configurations have special constraints */
2540
                new_block->host = kvm_vmalloc(size);
2541
            } else {
2542
                new_block->host = qemu_vmalloc(size);
2543
            }
2544
            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2545
        }
2546
    }
2547
    new_block->length = size;
2548

    
2549
    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2550

    
2551
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2552
                                       last_ram_offset() >> TARGET_PAGE_BITS);
2553
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2554
           0, size >> TARGET_PAGE_BITS);
2555
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2556

    
2557
    if (kvm_enabled())
2558
        kvm_setup_guest_memory(new_block->host, size);
2559

    
2560
    return new_block->offset;
2561
}
2562

    
2563
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2564
{
2565
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
2566
}
2567

    
2568
void qemu_ram_free_from_ptr(ram_addr_t addr)
2569
{
2570
    RAMBlock *block;
2571

    
2572
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2573
        if (addr == block->offset) {
2574
            QLIST_REMOVE(block, next);
2575
            g_free(block);
2576
            return;
2577
        }
2578
    }
2579
}
2580

    
2581
void qemu_ram_free(ram_addr_t addr)
2582
{
2583
    RAMBlock *block;
2584

    
2585
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2586
        if (addr == block->offset) {
2587
            QLIST_REMOVE(block, next);
2588
            if (block->flags & RAM_PREALLOC_MASK) {
2589
                ;
2590
            } else if (mem_path) {
2591
#if defined (__linux__) && !defined(TARGET_S390X)
2592
                if (block->fd) {
2593
                    munmap(block->host, block->length);
2594
                    close(block->fd);
2595
                } else {
2596
                    qemu_vfree(block->host);
2597
                }
2598
#else
2599
                abort();
2600
#endif
2601
            } else {
2602
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2603
                munmap(block->host, block->length);
2604
#else
2605
                if (xen_enabled()) {
2606
                    xen_invalidate_map_cache_entry(block->host);
2607
                } else {
2608
                    qemu_vfree(block->host);
2609
                }
2610
#endif
2611
            }
2612
            g_free(block);
2613
            return;
2614
        }
2615
    }
2616

    
2617
}
2618

    
2619
#ifndef _WIN32
2620
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2621
{
2622
    RAMBlock *block;
2623
    ram_addr_t offset;
2624
    int flags;
2625
    void *area, *vaddr;
2626

    
2627
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2628
        offset = addr - block->offset;
2629
        if (offset < block->length) {
2630
            vaddr = block->host + offset;
2631
            if (block->flags & RAM_PREALLOC_MASK) {
2632
                ;
2633
            } else {
2634
                flags = MAP_FIXED;
2635
                munmap(vaddr, length);
2636
                if (mem_path) {
2637
#if defined(__linux__) && !defined(TARGET_S390X)
2638
                    if (block->fd) {
2639
#ifdef MAP_POPULATE
2640
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2641
                            MAP_PRIVATE;
2642
#else
2643
                        flags |= MAP_PRIVATE;
2644
#endif
2645
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2646
                                    flags, block->fd, offset);
2647
                    } else {
2648
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2649
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2650
                                    flags, -1, 0);
2651
                    }
2652
#else
2653
                    abort();
2654
#endif
2655
                } else {
2656
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2657
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
2658
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2659
                                flags, -1, 0);
2660
#else
2661
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2662
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2663
                                flags, -1, 0);
2664
#endif
2665
                }
2666
                if (area != vaddr) {
2667
                    fprintf(stderr, "Could not remap addr: "
2668
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2669
                            length, addr);
2670
                    exit(1);
2671
                }
2672
                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2673
            }
2674
            return;
2675
        }
2676
    }
2677
}
2678
#endif /* !_WIN32 */
2679

    
2680
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2681
   With the exception of the softmmu code in this file, this should
2682
   only be used for local memory (e.g. video ram) that the device owns,
2683
   and knows it isn't going to access beyond the end of the block.
2684

2685
   It should not be used for general purpose DMA.
2686
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2687
 */
2688
void *qemu_get_ram_ptr(ram_addr_t addr)
2689
{
2690
    RAMBlock *block;
2691

    
2692
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2693
        if (addr - block->offset < block->length) {
2694
            /* Move this entry to to start of the list.  */
2695
            if (block != QLIST_FIRST(&ram_list.blocks)) {
2696
                QLIST_REMOVE(block, next);
2697
                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2698
            }
2699
            if (xen_enabled()) {
2700
                /* We need to check if the requested address is in the RAM
2701
                 * because we don't want to map the entire memory in QEMU.
2702
                 * In that case just map until the end of the page.
2703
                 */
2704
                if (block->offset == 0) {
2705
                    return xen_map_cache(addr, 0, 0);
2706
                } else if (block->host == NULL) {
2707
                    block->host =
2708
                        xen_map_cache(block->offset, block->length, 1);
2709
                }
2710
            }
2711
            return block->host + (addr - block->offset);
2712
        }
2713
    }
2714

    
2715
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2716
    abort();
2717

    
2718
    return NULL;
2719
}
2720

    
2721
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2722
 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2723
 */
2724
void *qemu_safe_ram_ptr(ram_addr_t addr)
2725
{
2726
    RAMBlock *block;
2727

    
2728
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2729
        if (addr - block->offset < block->length) {
2730
            if (xen_enabled()) {
2731
                /* We need to check if the requested address is in the RAM
2732
                 * because we don't want to map the entire memory in QEMU.
2733
                 * In that case just map until the end of the page.
2734
                 */
2735
                if (block->offset == 0) {
2736
                    return xen_map_cache(addr, 0, 0);
2737
                } else if (block->host == NULL) {
2738
                    block->host =
2739
                        xen_map_cache(block->offset, block->length, 1);
2740
                }
2741
            }
2742
            return block->host + (addr - block->offset);
2743
        }
2744
    }
2745

    
2746
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2747
    abort();
2748

    
2749
    return NULL;
2750
}
2751

    
2752
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2753
 * but takes a size argument */
2754
void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2755
{
2756
    if (*size == 0) {
2757
        return NULL;
2758
    }
2759
    if (xen_enabled()) {
2760
        return xen_map_cache(addr, *size, 1);
2761
    } else {
2762
        RAMBlock *block;
2763

    
2764
        QLIST_FOREACH(block, &ram_list.blocks, next) {
2765
            if (addr - block->offset < block->length) {
2766
                if (addr - block->offset + *size > block->length)
2767
                    *size = block->length - addr + block->offset;
2768
                return block->host + (addr - block->offset);
2769
            }
2770
        }
2771

    
2772
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2773
        abort();
2774
    }
2775
}
2776

    
2777
void qemu_put_ram_ptr(void *addr)
2778
{
2779
    trace_qemu_put_ram_ptr(addr);
2780
}
2781

    
2782
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2783
{
2784
    RAMBlock *block;
2785
    uint8_t *host = ptr;
2786

    
2787
    if (xen_enabled()) {
2788
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2789
        return 0;
2790
    }
2791

    
2792
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2793
        /* This case append when the block is not mapped. */
2794
        if (block->host == NULL) {
2795
            continue;
2796
        }
2797
        if (host - block->host < block->length) {
2798
            *ram_addr = block->offset + (host - block->host);
2799
            return 0;
2800
        }
2801
    }
2802

    
2803
    return -1;
2804
}
2805

    
2806
/* Some of the softmmu routines need to translate from a host pointer
2807
   (typically a TLB entry) back to a ram offset.  */
2808
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2809
{
2810
    ram_addr_t ram_addr;
2811

    
2812
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2813
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
2814
        abort();
2815
    }
2816
    return ram_addr;
2817
}
2818

    
2819
static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2820
                                    unsigned size)
2821
{
2822
#ifdef DEBUG_UNASSIGNED
2823
    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2824
#endif
2825
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2826
    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2827
#endif
2828
    return 0;
2829
}
2830

    
2831
static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2832
                                 uint64_t val, unsigned size)
2833
{
2834
#ifdef DEBUG_UNASSIGNED
2835
    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2836
#endif
2837
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2838
    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2839
#endif
2840
}
2841

    
2842
static const MemoryRegionOps unassigned_mem_ops = {
2843
    .read = unassigned_mem_read,
2844
    .write = unassigned_mem_write,
2845
    .endianness = DEVICE_NATIVE_ENDIAN,
2846
};
2847

    
2848
static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2849
                               unsigned size)
2850
{
2851
    abort();
2852
}
2853

    
2854
static void error_mem_write(void *opaque, target_phys_addr_t addr,
2855
                            uint64_t value, unsigned size)
2856
{
2857
    abort();
2858
}
2859

    
2860
static const MemoryRegionOps error_mem_ops = {
2861
    .read = error_mem_read,
2862
    .write = error_mem_write,
2863
    .endianness = DEVICE_NATIVE_ENDIAN,
2864
};
2865

    
2866
static const MemoryRegionOps rom_mem_ops = {
2867
    .read = error_mem_read,
2868
    .write = unassigned_mem_write,
2869
    .endianness = DEVICE_NATIVE_ENDIAN,
2870
};
2871

    
2872
static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2873
                               uint64_t val, unsigned size)
2874
{
2875
    int dirty_flags;
2876
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2877
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2878
#if !defined(CONFIG_USER_ONLY)
2879
        tb_invalidate_phys_page_fast(ram_addr, size);
2880
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2881
#endif
2882
    }
2883
    switch (size) {
2884
    case 1:
2885
        stb_p(qemu_get_ram_ptr(ram_addr), val);
2886
        break;
2887
    case 2:
2888
        stw_p(qemu_get_ram_ptr(ram_addr), val);
2889
        break;
2890
    case 4:
2891
        stl_p(qemu_get_ram_ptr(ram_addr), val);
2892
        break;
2893
    default:
2894
        abort();
2895
    }
2896
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2897
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2898
    /* we remove the notdirty callback only if the code has been
2899
       flushed */
2900
    if (dirty_flags == 0xff)
2901
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2902
}
2903

    
2904
static const MemoryRegionOps notdirty_mem_ops = {
2905
    .read = error_mem_read,
2906
    .write = notdirty_mem_write,
2907
    .endianness = DEVICE_NATIVE_ENDIAN,
2908
};
2909

    
2910
/* Generate a debug exception if a watchpoint has been hit.  */
2911
static void check_watchpoint(int offset, int len_mask, int flags)
2912
{
2913
    CPUArchState *env = cpu_single_env;
2914
    target_ulong pc, cs_base;
2915
    TranslationBlock *tb;
2916
    target_ulong vaddr;
2917
    CPUWatchpoint *wp;
2918
    int cpu_flags;
2919

    
2920
    if (env->watchpoint_hit) {
2921
        /* We re-entered the check after replacing the TB. Now raise
2922
         * the debug interrupt so that is will trigger after the
2923
         * current instruction. */
2924
        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2925
        return;
2926
    }
2927
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2928
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2929
        if ((vaddr == (wp->vaddr & len_mask) ||
2930
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2931
            wp->flags |= BP_WATCHPOINT_HIT;
2932
            if (!env->watchpoint_hit) {
2933
                env->watchpoint_hit = wp;
2934
                tb = tb_find_pc(env->mem_io_pc);
2935
                if (!tb) {
2936
                    cpu_abort(env, "check_watchpoint: could not find TB for "
2937
                              "pc=%p", (void *)env->mem_io_pc);
2938
                }
2939
                cpu_restore_state(tb, env, env->mem_io_pc);
2940
                tb_phys_invalidate(tb, -1);
2941
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2942
                    env->exception_index = EXCP_DEBUG;
2943
                    cpu_loop_exit(env);
2944
                } else {
2945
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2946
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2947
                    cpu_resume_from_signal(env, NULL);
2948
                }
2949
            }
2950
        } else {
2951
            wp->flags &= ~BP_WATCHPOINT_HIT;
2952
        }
2953
    }
2954
}
2955

    
2956
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2957
   so these check for a hit then pass through to the normal out-of-line
2958
   phys routines.  */
2959
static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2960
                               unsigned size)
2961
{
2962
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2963
    switch (size) {
2964
    case 1: return ldub_phys(addr);
2965
    case 2: return lduw_phys(addr);
2966
    case 4: return ldl_phys(addr);
2967
    default: abort();
2968
    }
2969
}
2970

    
2971
static void watch_mem_write(void *opaque, target_phys_addr_t addr,
2972
                            uint64_t val, unsigned size)
2973
{
2974
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2975
    switch (size) {
2976
    case 1:
2977
        stb_phys(addr, val);
2978
        break;
2979
    case 2:
2980
        stw_phys(addr, val);
2981
        break;
2982
    case 4:
2983
        stl_phys(addr, val);
2984
        break;
2985
    default: abort();
2986
    }
2987
}
2988

    
2989
static const MemoryRegionOps watch_mem_ops = {
2990
    .read = watch_mem_read,
2991
    .write = watch_mem_write,
2992
    .endianness = DEVICE_NATIVE_ENDIAN,
2993
};
2994

    
2995
static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
2996
                             unsigned len)
2997
{
2998
    subpage_t *mmio = opaque;
2999
    unsigned int idx = SUBPAGE_IDX(addr);
3000
    MemoryRegionSection *section;
3001
#if defined(DEBUG_SUBPAGE)
3002
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3003
           mmio, len, addr, idx);
3004
#endif
3005

    
3006
    section = &phys_sections[mmio->sub_section[idx]];
3007
    addr += mmio->base;
3008
    addr -= section->offset_within_address_space;
3009
    addr += section->offset_within_region;
3010
    return io_mem_read(section->mr, addr, len);
3011
}
3012

    
3013
static void subpage_write(void *opaque, target_phys_addr_t addr,
3014
                          uint64_t value, unsigned len)
3015
{
3016
    subpage_t *mmio = opaque;
3017
    unsigned int idx = SUBPAGE_IDX(addr);
3018
    MemoryRegionSection *section;
3019
#if defined(DEBUG_SUBPAGE)
3020
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3021
           " idx %d value %"PRIx64"\n",
3022
           __func__, mmio, len, addr, idx, value);
3023
#endif
3024

    
3025
    section = &phys_sections[mmio->sub_section[idx]];
3026
    addr += mmio->base;
3027
    addr -= section->offset_within_address_space;
3028
    addr += section->offset_within_region;
3029
    io_mem_write(section->mr, addr, value, len);
3030
}
3031

    
3032
static const MemoryRegionOps subpage_ops = {
3033
    .read = subpage_read,
3034
    .write = subpage_write,
3035
    .endianness = DEVICE_NATIVE_ENDIAN,
3036
};
3037

    
3038
static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3039
                                 unsigned size)
3040
{
3041
    ram_addr_t raddr = addr;
3042
    void *ptr = qemu_get_ram_ptr(raddr);
3043
    switch (size) {
3044
    case 1: return ldub_p(ptr);
3045
    case 2: return lduw_p(ptr);
3046
    case 4: return ldl_p(ptr);
3047
    default: abort();
3048
    }
3049
}
3050

    
3051
static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3052
                              uint64_t value, unsigned size)
3053
{
3054
    ram_addr_t raddr = addr;
3055
    void *ptr = qemu_get_ram_ptr(raddr);
3056
    switch (size) {
3057
    case 1: return stb_p(ptr, value);
3058
    case 2: return stw_p(ptr, value);
3059
    case 4: return stl_p(ptr, value);
3060
    default: abort();
3061
    }
3062
}
3063

    
3064
static const MemoryRegionOps subpage_ram_ops = {
3065
    .read = subpage_ram_read,
3066
    .write = subpage_ram_write,
3067
    .endianness = DEVICE_NATIVE_ENDIAN,
3068
};
3069

    
3070
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3071
                             uint16_t section)
3072
{
3073
    int idx, eidx;
3074

    
3075
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3076
        return -1;
3077
    idx = SUBPAGE_IDX(start);
3078
    eidx = SUBPAGE_IDX(end);
3079
#if defined(DEBUG_SUBPAGE)
3080
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3081
           mmio, start, end, idx, eidx, memory);
3082
#endif
3083
    if (memory_region_is_ram(phys_sections[section].mr)) {
3084
        MemoryRegionSection new_section = phys_sections[section];
3085
        new_section.mr = &io_mem_subpage_ram;
3086
        section = phys_section_add(&new_section);
3087
    }
3088
    for (; idx <= eidx; idx++) {
3089
        mmio->sub_section[idx] = section;
3090
    }
3091

    
3092
    return 0;
3093
}
3094

    
3095
static subpage_t *subpage_init(target_phys_addr_t base)
3096
{
3097
    subpage_t *mmio;
3098

    
3099
    mmio = g_malloc0(sizeof(subpage_t));
3100

    
3101
    mmio->base = base;
3102
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3103
                          "subpage", TARGET_PAGE_SIZE);
3104
    mmio->iomem.subpage = true;
3105
#if defined(DEBUG_SUBPAGE)
3106
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3107
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3108
#endif
3109
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3110

    
3111
    return mmio;
3112
}
3113

    
3114
static uint16_t dummy_section(MemoryRegion *mr)
3115
{
3116
    MemoryRegionSection section = {
3117
        .mr = mr,
3118
        .offset_within_address_space = 0,
3119
        .offset_within_region = 0,
3120
        .size = UINT64_MAX,
3121
    };
3122

    
3123
    return phys_section_add(&section);
3124
}
3125

    
3126
MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3127
{
3128
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3129
}
3130

    
3131
static void io_mem_init(void)
3132
{
3133
    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3134
    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3135
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3136
                          "unassigned", UINT64_MAX);
3137
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3138
                          "notdirty", UINT64_MAX);
3139
    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3140
                          "subpage-ram", UINT64_MAX);
3141
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3142
                          "watch", UINT64_MAX);
3143
}
3144

    
3145
static void core_begin(MemoryListener *listener)
3146
{
3147
    destroy_all_mappings();
3148
    phys_sections_clear();
3149
    phys_map.ptr = PHYS_MAP_NODE_NIL;
3150
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
3151
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
3152
    phys_section_rom = dummy_section(&io_mem_rom);
3153
    phys_section_watch = dummy_section(&io_mem_watch);
3154
}
3155

    
3156
static void core_commit(MemoryListener *listener)
3157
{
3158
    CPUArchState *env;
3159

    
3160
    /* since each CPU stores ram addresses in its TLB cache, we must
3161
       reset the modified entries */
3162
    /* XXX: slow ! */
3163
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
3164
        tlb_flush(env, 1);
3165
    }
3166
}
3167

    
3168
static void core_region_add(MemoryListener *listener,
3169
                            MemoryRegionSection *section)
3170
{
3171
    cpu_register_physical_memory_log(section, section->readonly);
3172
}
3173

    
3174
static void core_region_del(MemoryListener *listener,
3175
                            MemoryRegionSection *section)
3176
{
3177
}
3178

    
3179
static void core_region_nop(MemoryListener *listener,
3180
                            MemoryRegionSection *section)
3181
{
3182
    cpu_register_physical_memory_log(section, section->readonly);
3183
}
3184

    
3185
static void core_log_start(MemoryListener *listener,
3186
                           MemoryRegionSection *section)
3187
{
3188
}
3189

    
3190
static void core_log_stop(MemoryListener *listener,
3191
                          MemoryRegionSection *section)
3192
{
3193
}
3194

    
3195
static void core_log_sync(MemoryListener *listener,
3196
                          MemoryRegionSection *section)
3197
{
3198
}
3199

    
3200
static void core_log_global_start(MemoryListener *listener)
3201
{
3202
    cpu_physical_memory_set_dirty_tracking(1);
3203
}
3204

    
3205
static void core_log_global_stop(MemoryListener *listener)
3206
{
3207
    cpu_physical_memory_set_dirty_tracking(0);
3208
}
3209

    
3210
static void core_eventfd_add(MemoryListener *listener,
3211
                             MemoryRegionSection *section,
3212
                             bool match_data, uint64_t data, EventNotifier *e)
3213
{
3214
}
3215

    
3216
static void core_eventfd_del(MemoryListener *listener,
3217
                             MemoryRegionSection *section,
3218
                             bool match_data, uint64_t data, EventNotifier *e)
3219
{
3220
}
3221

    
3222
static void io_begin(MemoryListener *listener)
3223
{
3224
}
3225

    
3226
static void io_commit(MemoryListener *listener)
3227
{
3228
}
3229

    
3230
static void io_region_add(MemoryListener *listener,
3231
                          MemoryRegionSection *section)
3232
{
3233
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3234

    
3235
    mrio->mr = section->mr;
3236
    mrio->offset = section->offset_within_region;
3237
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3238
                 section->offset_within_address_space, section->size);
3239
    ioport_register(&mrio->iorange);
3240
}
3241

    
3242
static void io_region_del(MemoryListener *listener,
3243
                          MemoryRegionSection *section)
3244
{
3245
    isa_unassign_ioport(section->offset_within_address_space, section->size);
3246
}
3247

    
3248
static void io_region_nop(MemoryListener *listener,
3249
                          MemoryRegionSection *section)
3250
{
3251
}
3252

    
3253
static void io_log_start(MemoryListener *listener,
3254
                         MemoryRegionSection *section)
3255
{
3256
}
3257

    
3258
static void io_log_stop(MemoryListener *listener,
3259
                        MemoryRegionSection *section)
3260
{
3261
}
3262

    
3263
static void io_log_sync(MemoryListener *listener,
3264
                        MemoryRegionSection *section)
3265
{
3266
}
3267

    
3268
static void io_log_global_start(MemoryListener *listener)
3269
{
3270
}
3271

    
3272
static void io_log_global_stop(MemoryListener *listener)
3273
{
3274
}
3275

    
3276
static void io_eventfd_add(MemoryListener *listener,
3277
                           MemoryRegionSection *section,
3278
                           bool match_data, uint64_t data, EventNotifier *e)
3279
{
3280
}
3281

    
3282
static void io_eventfd_del(MemoryListener *listener,
3283
                           MemoryRegionSection *section,
3284
                           bool match_data, uint64_t data, EventNotifier *e)
3285
{
3286
}
3287

    
3288
static MemoryListener core_memory_listener = {
3289
    .begin = core_begin,
3290
    .commit = core_commit,
3291
    .region_add = core_region_add,
3292
    .region_del = core_region_del,
3293
    .region_nop = core_region_nop,
3294
    .log_start = core_log_start,
3295
    .log_stop = core_log_stop,
3296
    .log_sync = core_log_sync,
3297
    .log_global_start = core_log_global_start,
3298
    .log_global_stop = core_log_global_stop,
3299
    .eventfd_add = core_eventfd_add,
3300
    .eventfd_del = core_eventfd_del,
3301
    .priority = 0,
3302
};
3303

    
3304
static MemoryListener io_memory_listener = {
3305
    .begin = io_begin,
3306
    .commit = io_commit,
3307
    .region_add = io_region_add,
3308
    .region_del = io_region_del,
3309
    .region_nop = io_region_nop,
3310
    .log_start = io_log_start,
3311
    .log_stop = io_log_stop,
3312
    .log_sync = io_log_sync,
3313
    .log_global_start = io_log_global_start,
3314
    .log_global_stop = io_log_global_stop,
3315
    .eventfd_add = io_eventfd_add,
3316
    .eventfd_del = io_eventfd_del,
3317
    .priority = 0,
3318
};
3319

    
3320
static void memory_map_init(void)
3321
{
3322
    system_memory = g_malloc(sizeof(*system_memory));
3323
    memory_region_init(system_memory, "system", INT64_MAX);
3324
    set_system_memory_map(system_memory);
3325

    
3326
    system_io = g_malloc(sizeof(*system_io));
3327
    memory_region_init(system_io, "io", 65536);
3328
    set_system_io_map(system_io);
3329

    
3330
    memory_listener_register(&core_memory_listener, system_memory);
3331
    memory_listener_register(&io_memory_listener, system_io);
3332
}
3333

    
3334
MemoryRegion *get_system_memory(void)
3335
{
3336
    return system_memory;
3337
}
3338

    
3339
MemoryRegion *get_system_io(void)
3340
{
3341
    return system_io;
3342
}
3343

    
3344
#endif /* !defined(CONFIG_USER_ONLY) */
3345

    
3346
/* physical memory access (slow version, mainly for debug) */
3347
#if defined(CONFIG_USER_ONLY)
3348
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3349
                        uint8_t *buf, int len, int is_write)
3350
{
3351
    int l, flags;
3352
    target_ulong page;
3353
    void * p;
3354

    
3355
    while (len > 0) {
3356
        page = addr & TARGET_PAGE_MASK;
3357
        l = (page + TARGET_PAGE_SIZE) - addr;
3358
        if (l > len)
3359
            l = len;
3360
        flags = page_get_flags(page);
3361
        if (!(flags & PAGE_VALID))
3362
            return -1;
3363
        if (is_write) {
3364
            if (!(flags & PAGE_WRITE))
3365
                return -1;
3366
            /* XXX: this code should not depend on lock_user */
3367
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3368
                return -1;
3369
            memcpy(p, buf, l);
3370
            unlock_user(p, addr, l);
3371
        } else {
3372
            if (!(flags & PAGE_READ))
3373
                return -1;
3374
            /* XXX: this code should not depend on lock_user */
3375
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3376
                return -1;
3377
            memcpy(buf, p, l);
3378
            unlock_user(p, addr, 0);
3379
        }
3380
        len -= l;
3381
        buf += l;
3382
        addr += l;
3383
    }
3384
    return 0;
3385
}
3386

    
3387
#else
3388
void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3389
                            int len, int is_write)
3390
{
3391
    int l;
3392
    uint8_t *ptr;
3393
    uint32_t val;
3394
    target_phys_addr_t page;
3395
    MemoryRegionSection *section;
3396

    
3397
    while (len > 0) {
3398
        page = addr & TARGET_PAGE_MASK;
3399
        l = (page + TARGET_PAGE_SIZE) - addr;
3400
        if (l > len)
3401
            l = len;
3402
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3403

    
3404
        if (is_write) {
3405
            if (!memory_region_is_ram(section->mr)) {
3406
                target_phys_addr_t addr1;
3407
                addr1 = memory_region_section_addr(section, addr);
3408
                /* XXX: could force cpu_single_env to NULL to avoid
3409
                   potential bugs */
3410
                if (l >= 4 && ((addr1 & 3) == 0)) {
3411
                    /* 32 bit write access */
3412
                    val = ldl_p(buf);
3413
                    io_mem_write(section->mr, addr1, val, 4);
3414
                    l = 4;
3415
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3416
                    /* 16 bit write access */
3417
                    val = lduw_p(buf);
3418
                    io_mem_write(section->mr, addr1, val, 2);
3419
                    l = 2;
3420
                } else {
3421
                    /* 8 bit write access */
3422
                    val = ldub_p(buf);
3423
                    io_mem_write(section->mr, addr1, val, 1);
3424
                    l = 1;
3425
                }
3426
            } else if (!section->readonly) {
3427
                ram_addr_t addr1;
3428
                addr1 = memory_region_get_ram_addr(section->mr)
3429
                    + memory_region_section_addr(section, addr);
3430
                /* RAM case */
3431
                ptr = qemu_get_ram_ptr(addr1);
3432
                memcpy(ptr, buf, l);
3433
                if (!cpu_physical_memory_is_dirty(addr1)) {
3434
                    /* invalidate code */
3435
                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3436
                    /* set dirty bit */
3437
                    cpu_physical_memory_set_dirty_flags(
3438
                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3439
                }
3440
                qemu_put_ram_ptr(ptr);
3441
            }
3442
        } else {
3443
            if (!(memory_region_is_ram(section->mr) ||
3444
                  memory_region_is_romd(section->mr))) {
3445
                target_phys_addr_t addr1;
3446
                /* I/O case */
3447
                addr1 = memory_region_section_addr(section, addr);
3448
                if (l >= 4 && ((addr1 & 3) == 0)) {
3449
                    /* 32 bit read access */
3450
                    val = io_mem_read(section->mr, addr1, 4);
3451
                    stl_p(buf, val);
3452
                    l = 4;
3453
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3454
                    /* 16 bit read access */
3455
                    val = io_mem_read(section->mr, addr1, 2);
3456
                    stw_p(buf, val);
3457
                    l = 2;
3458
                } else {
3459
                    /* 8 bit read access */
3460
                    val = io_mem_read(section->mr, addr1, 1);
3461
                    stb_p(buf, val);
3462
                    l = 1;
3463
                }
3464
            } else {
3465
                /* RAM case */
3466
                ptr = qemu_get_ram_ptr(section->mr->ram_addr
3467
                                       + memory_region_section_addr(section,
3468
                                                                    addr));
3469
                memcpy(buf, ptr, l);
3470
                qemu_put_ram_ptr(ptr);
3471
            }
3472
        }
3473
        len -= l;
3474
        buf += l;
3475
        addr += l;
3476
    }
3477
}
3478

    
3479
/* used for ROM loading : can write in RAM and ROM */
3480
void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3481
                                   const uint8_t *buf, int len)
3482
{
3483
    int l;
3484
    uint8_t *ptr;
3485
    target_phys_addr_t page;
3486
    MemoryRegionSection *section;
3487

    
3488
    while (len > 0) {
3489
        page = addr & TARGET_PAGE_MASK;
3490
        l = (page + TARGET_PAGE_SIZE) - addr;
3491
        if (l > len)
3492
            l = len;
3493
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3494

    
3495
        if (!(memory_region_is_ram(section->mr) ||
3496
              memory_region_is_romd(section->mr))) {
3497
            /* do nothing */
3498
        } else {
3499
            unsigned long addr1;
3500
            addr1 = memory_region_get_ram_addr(section->mr)
3501
                + memory_region_section_addr(section, addr);
3502
            /* ROM/RAM case */
3503
            ptr = qemu_get_ram_ptr(addr1);
3504
            memcpy(ptr, buf, l);
3505
            qemu_put_ram_ptr(ptr);
3506
        }
3507
        len -= l;
3508
        buf += l;
3509
        addr += l;
3510
    }
3511
}
3512

    
3513
typedef struct {
3514
    void *buffer;
3515
    target_phys_addr_t addr;
3516
    target_phys_addr_t len;
3517
} BounceBuffer;
3518

    
3519
static BounceBuffer bounce;
3520

    
3521
typedef struct MapClient {
3522
    void *opaque;
3523
    void (*callback)(void *opaque);
3524
    QLIST_ENTRY(MapClient) link;
3525
} MapClient;
3526

    
3527
static QLIST_HEAD(map_client_list, MapClient) map_client_list
3528
    = QLIST_HEAD_INITIALIZER(map_client_list);
3529

    
3530
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3531
{
3532
    MapClient *client = g_malloc(sizeof(*client));
3533

    
3534
    client->opaque = opaque;
3535
    client->callback = callback;
3536
    QLIST_INSERT_HEAD(&map_client_list, client, link);
3537
    return client;
3538
}
3539

    
3540
void cpu_unregister_map_client(void *_client)
3541
{
3542
    MapClient *client = (MapClient *)_client;
3543

    
3544
    QLIST_REMOVE(client, link);
3545
    g_free(client);
3546
}
3547

    
3548
static void cpu_notify_map_clients(void)
3549
{
3550
    MapClient *client;
3551

    
3552
    while (!QLIST_EMPTY(&map_client_list)) {
3553
        client = QLIST_FIRST(&map_client_list);
3554
        client->callback(client->opaque);
3555
        cpu_unregister_map_client(client);
3556
    }
3557
}
3558

    
3559
/* Map a physical memory region into a host virtual address.
3560
 * May map a subset of the requested range, given by and returned in *plen.
3561
 * May return NULL if resources needed to perform the mapping are exhausted.
3562
 * Use only for reads OR writes - not for read-modify-write operations.
3563
 * Use cpu_register_map_client() to know when retrying the map operation is
3564
 * likely to succeed.
3565
 */
3566
void *cpu_physical_memory_map(target_phys_addr_t addr,
3567
                              target_phys_addr_t *plen,
3568
                              int is_write)
3569
{
3570
    target_phys_addr_t len = *plen;
3571
    target_phys_addr_t todo = 0;
3572
    int l;
3573
    target_phys_addr_t page;
3574
    MemoryRegionSection *section;
3575
    ram_addr_t raddr = RAM_ADDR_MAX;
3576
    ram_addr_t rlen;
3577
    void *ret;
3578

    
3579
    while (len > 0) {
3580
        page = addr & TARGET_PAGE_MASK;
3581
        l = (page + TARGET_PAGE_SIZE) - addr;
3582
        if (l > len)
3583
            l = len;
3584
        section = phys_page_find(page >> TARGET_PAGE_BITS);
3585

    
3586
        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3587
            if (todo || bounce.buffer) {
3588
                break;
3589
            }
3590
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3591
            bounce.addr = addr;
3592
            bounce.len = l;
3593
            if (!is_write) {
3594
                cpu_physical_memory_read(addr, bounce.buffer, l);
3595
            }
3596

    
3597
            *plen = l;
3598
            return bounce.buffer;
3599
        }
3600
        if (!todo) {
3601
            raddr = memory_region_get_ram_addr(section->mr)
3602
                + memory_region_section_addr(section, addr);
3603
        }
3604

    
3605
        len -= l;
3606
        addr += l;
3607
        todo += l;
3608
    }
3609
    rlen = todo;
3610
    ret = qemu_ram_ptr_length(raddr, &rlen);
3611
    *plen = rlen;
3612
    return ret;
3613
}
3614

    
3615
/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3616
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3617
 * the amount of memory that was actually read or written by the caller.
3618
 */
3619
void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3620
                               int is_write, target_phys_addr_t access_len)
3621
{
3622
    if (buffer != bounce.buffer) {
3623
        if (is_write) {
3624
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3625
            while (access_len) {
3626
                unsigned l;
3627
                l = TARGET_PAGE_SIZE;
3628
                if (l > access_len)
3629
                    l = access_len;
3630
                if (!cpu_physical_memory_is_dirty(addr1)) {
3631
                    /* invalidate code */
3632
                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3633
                    /* set dirty bit */
3634
                    cpu_physical_memory_set_dirty_flags(
3635
                        addr1, (0xff & ~CODE_DIRTY_FLAG));
3636
                }
3637
                addr1 += l;
3638
                access_len -= l;
3639
            }
3640
        }
3641
        if (xen_enabled()) {
3642
            xen_invalidate_map_cache_entry(buffer);
3643
        }
3644
        return;
3645
    }
3646
    if (is_write) {
3647
        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3648
    }
3649
    qemu_vfree(bounce.buffer);
3650
    bounce.buffer = NULL;
3651
    cpu_notify_map_clients();
3652
}
3653

    
3654
/* warning: addr must be aligned */
3655
static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3656
                                         enum device_endian endian)
3657
{
3658
    uint8_t *ptr;
3659
    uint32_t val;
3660
    MemoryRegionSection *section;
3661

    
3662
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3663

    
3664
    if (!(memory_region_is_ram(section->mr) ||
3665
          memory_region_is_romd(section->mr))) {
3666
        /* I/O case */
3667
        addr = memory_region_section_addr(section, addr);
3668
        val = io_mem_read(section->mr, addr, 4);
3669
#if defined(TARGET_WORDS_BIGENDIAN)
3670
        if (endian == DEVICE_LITTLE_ENDIAN) {
3671
            val = bswap32(val);
3672
        }
3673
#else
3674
        if (endian == DEVICE_BIG_ENDIAN) {
3675
            val = bswap32(val);
3676
        }
3677
#endif
3678
    } else {
3679
        /* RAM case */
3680
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3681
                                & TARGET_PAGE_MASK)
3682
                               + memory_region_section_addr(section, addr));
3683
        switch (endian) {
3684
        case DEVICE_LITTLE_ENDIAN:
3685
            val = ldl_le_p(ptr);
3686
            break;
3687
        case DEVICE_BIG_ENDIAN:
3688
            val = ldl_be_p(ptr);
3689
            break;
3690
        default:
3691
            val = ldl_p(ptr);
3692
            break;
3693
        }
3694
    }
3695
    return val;
3696
}
3697

    
3698
uint32_t ldl_phys(target_phys_addr_t addr)
3699
{
3700
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3701
}
3702

    
3703
uint32_t ldl_le_phys(target_phys_addr_t addr)
3704
{
3705
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3706
}
3707

    
3708
uint32_t ldl_be_phys(target_phys_addr_t addr)
3709
{
3710
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3711
}
3712

    
3713
/* warning: addr must be aligned */
3714
static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3715
                                         enum device_endian endian)
3716
{
3717
    uint8_t *ptr;
3718
    uint64_t val;
3719
    MemoryRegionSection *section;
3720

    
3721
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3722

    
3723
    if (!(memory_region_is_ram(section->mr) ||
3724
          memory_region_is_romd(section->mr))) {
3725
        /* I/O case */
3726
        addr = memory_region_section_addr(section, addr);
3727

    
3728
        /* XXX This is broken when device endian != cpu endian.
3729
               Fix and add "endian" variable check */
3730
#ifdef TARGET_WORDS_BIGENDIAN
3731
        val = io_mem_read(section->mr, addr, 4) << 32;
3732
        val |= io_mem_read(section->mr, addr + 4, 4);
3733
#else
3734
        val = io_mem_read(section->mr, addr, 4);
3735
        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3736
#endif
3737
    } else {
3738
        /* RAM case */
3739
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3740
                                & TARGET_PAGE_MASK)
3741
                               + memory_region_section_addr(section, addr));
3742
        switch (endian) {
3743
        case DEVICE_LITTLE_ENDIAN:
3744
            val = ldq_le_p(ptr);
3745
            break;
3746
        case DEVICE_BIG_ENDIAN:
3747
            val = ldq_be_p(ptr);
3748
            break;
3749
        default:
3750
            val = ldq_p(ptr);
3751
            break;
3752
        }
3753
    }
3754
    return val;
3755
}
3756

    
3757
uint64_t ldq_phys(target_phys_addr_t addr)
3758
{
3759
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3760
}
3761

    
3762
uint64_t ldq_le_phys(target_phys_addr_t addr)
3763
{
3764
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3765
}
3766

    
3767
uint64_t ldq_be_phys(target_phys_addr_t addr)
3768
{
3769
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3770
}
3771

    
3772
/* XXX: optimize */
3773
uint32_t ldub_phys(target_phys_addr_t addr)
3774
{
3775
    uint8_t val;
3776
    cpu_physical_memory_read(addr, &val, 1);
3777
    return val;
3778
}
3779

    
3780
/* warning: addr must be aligned */
3781
static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3782
                                          enum device_endian endian)
3783
{
3784
    uint8_t *ptr;
3785
    uint64_t val;
3786
    MemoryRegionSection *section;
3787

    
3788
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3789

    
3790
    if (!(memory_region_is_ram(section->mr) ||
3791
          memory_region_is_romd(section->mr))) {
3792
        /* I/O case */
3793
        addr = memory_region_section_addr(section, addr);
3794
        val = io_mem_read(section->mr, addr, 2);
3795
#if defined(TARGET_WORDS_BIGENDIAN)
3796
        if (endian == DEVICE_LITTLE_ENDIAN) {
3797
            val = bswap16(val);
3798
        }
3799
#else
3800
        if (endian == DEVICE_BIG_ENDIAN) {
3801
            val = bswap16(val);
3802
        }
3803
#endif
3804
    } else {
3805
        /* RAM case */
3806
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3807
                                & TARGET_PAGE_MASK)
3808
                               + memory_region_section_addr(section, addr));
3809
        switch (endian) {
3810
        case DEVICE_LITTLE_ENDIAN:
3811
            val = lduw_le_p(ptr);
3812
            break;
3813
        case DEVICE_BIG_ENDIAN:
3814
            val = lduw_be_p(ptr);
3815
            break;
3816
        default:
3817
            val = lduw_p(ptr);
3818
            break;
3819
        }
3820
    }
3821
    return val;
3822
}
3823

    
3824
uint32_t lduw_phys(target_phys_addr_t addr)
3825
{
3826
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3827
}
3828

    
3829
uint32_t lduw_le_phys(target_phys_addr_t addr)
3830
{
3831
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3832
}
3833

    
3834
uint32_t lduw_be_phys(target_phys_addr_t addr)
3835
{
3836
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3837
}
3838

    
3839
/* warning: addr must be aligned. The ram page is not masked as dirty
3840
   and the code inside is not invalidated. It is useful if the dirty
3841
   bits are used to track modified PTEs */
3842
void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3843
{
3844
    uint8_t *ptr;
3845
    MemoryRegionSection *section;
3846

    
3847
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3848

    
3849
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3850
        addr = memory_region_section_addr(section, addr);
3851
        if (memory_region_is_ram(section->mr)) {
3852
            section = &phys_sections[phys_section_rom];
3853
        }
3854
        io_mem_write(section->mr, addr, val, 4);
3855
    } else {
3856
        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3857
                               & TARGET_PAGE_MASK)
3858
            + memory_region_section_addr(section, addr);
3859
        ptr = qemu_get_ram_ptr(addr1);
3860
        stl_p(ptr, val);
3861

    
3862
        if (unlikely(in_migration)) {
3863
            if (!cpu_physical_memory_is_dirty(addr1)) {
3864
                /* invalidate code */
3865
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3866
                /* set dirty bit */
3867
                cpu_physical_memory_set_dirty_flags(
3868
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
3869
            }
3870
        }
3871
    }
3872
}
3873

    
3874
void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3875
{
3876
    uint8_t *ptr;
3877
    MemoryRegionSection *section;
3878

    
3879
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3880

    
3881
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3882
        addr = memory_region_section_addr(section, addr);
3883
        if (memory_region_is_ram(section->mr)) {
3884
            section = &phys_sections[phys_section_rom];
3885
        }
3886
#ifdef TARGET_WORDS_BIGENDIAN
3887
        io_mem_write(section->mr, addr, val >> 32, 4);
3888
        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3889
#else
3890
        io_mem_write(section->mr, addr, (uint32_t)val, 4);
3891
        io_mem_write(section->mr, addr + 4, val >> 32, 4);
3892
#endif
3893
    } else {
3894
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3895
                                & TARGET_PAGE_MASK)
3896
                               + memory_region_section_addr(section, addr));
3897
        stq_p(ptr, val);
3898
    }
3899
}
3900

    
3901
/* warning: addr must be aligned */
3902
static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3903
                                     enum device_endian endian)
3904
{
3905
    uint8_t *ptr;
3906
    MemoryRegionSection *section;
3907

    
3908
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3909

    
3910
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3911
        addr = memory_region_section_addr(section, addr);
3912
        if (memory_region_is_ram(section->mr)) {
3913
            section = &phys_sections[phys_section_rom];
3914
        }
3915
#if defined(TARGET_WORDS_BIGENDIAN)
3916
        if (endian == DEVICE_LITTLE_ENDIAN) {
3917
            val = bswap32(val);
3918
        }
3919
#else
3920
        if (endian == DEVICE_BIG_ENDIAN) {
3921
            val = bswap32(val);
3922
        }
3923
#endif
3924
        io_mem_write(section->mr, addr, val, 4);
3925
    } else {
3926
        unsigned long addr1;
3927
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3928
            + memory_region_section_addr(section, addr);
3929
        /* RAM case */
3930
        ptr = qemu_get_ram_ptr(addr1);
3931
        switch (endian) {
3932
        case DEVICE_LITTLE_ENDIAN:
3933
            stl_le_p(ptr, val);
3934
            break;
3935
        case DEVICE_BIG_ENDIAN:
3936
            stl_be_p(ptr, val);
3937
            break;
3938
        default:
3939
            stl_p(ptr, val);
3940
            break;
3941
        }
3942
        if (!cpu_physical_memory_is_dirty(addr1)) {
3943
            /* invalidate code */
3944
            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3945
            /* set dirty bit */
3946
            cpu_physical_memory_set_dirty_flags(addr1,
3947
                (0xff & ~CODE_DIRTY_FLAG));
3948
        }
3949
    }
3950
}
3951

    
3952
void stl_phys(target_phys_addr_t addr, uint32_t val)
3953
{
3954
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3955
}
3956

    
3957
void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3958
{
3959
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3960
}
3961

    
3962
void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3963
{
3964
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3965
}
3966

    
3967
/* XXX: optimize */
3968
void stb_phys(target_phys_addr_t addr, uint32_t val)
3969
{
3970
    uint8_t v = val;
3971
    cpu_physical_memory_write(addr, &v, 1);
3972
}
3973

    
3974
/* warning: addr must be aligned */
3975
static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
3976
                                     enum device_endian endian)
3977
{
3978
    uint8_t *ptr;
3979
    MemoryRegionSection *section;
3980

    
3981
    section = phys_page_find(addr >> TARGET_PAGE_BITS);
3982

    
3983
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3984
        addr = memory_region_section_addr(section, addr);
3985
        if (memory_region_is_ram(section->mr)) {
3986
            section = &phys_sections[phys_section_rom];
3987
        }
3988
#if defined(TARGET_WORDS_BIGENDIAN)
3989
        if (endian == DEVICE_LITTLE_ENDIAN) {
3990
            val = bswap16(val);
3991
        }
3992
#else
3993
        if (endian == DEVICE_BIG_ENDIAN) {
3994
            val = bswap16(val);
3995
        }
3996
#endif
3997
        io_mem_write(section->mr, addr, val, 2);
3998
    } else {
3999
        unsigned long addr1;
4000
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4001
            + memory_region_section_addr(section, addr);
4002
        /* RAM case */
4003
        ptr = qemu_get_ram_ptr(addr1);
4004
        switch (endian) {
4005
        case DEVICE_LITTLE_ENDIAN:
4006
            stw_le_p(ptr, val);
4007
            break;
4008
        case DEVICE_BIG_ENDIAN:
4009
            stw_be_p(ptr, val);
4010
            break;
4011
        default:
4012
            stw_p(ptr, val);
4013
            break;
4014
        }
4015
        if (!cpu_physical_memory_is_dirty(addr1)) {
4016
            /* invalidate code */
4017
            tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4018
            /* set dirty bit */
4019
            cpu_physical_memory_set_dirty_flags(addr1,
4020
                (0xff & ~CODE_DIRTY_FLAG));
4021
        }
4022
    }
4023
}
4024

    
4025
void stw_phys(target_phys_addr_t addr, uint32_t val)
4026
{
4027
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4028
}
4029

    
4030
void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4031
{
4032
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4033
}
4034

    
4035
void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4036
{
4037
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4038
}
4039

    
4040
/* XXX: optimize */
4041
void stq_phys(target_phys_addr_t addr, uint64_t val)
4042
{
4043
    val = tswap64(val);
4044
    cpu_physical_memory_write(addr, &val, 8);
4045
}
4046

    
4047
void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4048
{
4049
    val = cpu_to_le64(val);
4050
    cpu_physical_memory_write(addr, &val, 8);
4051
}
4052

    
4053
void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4054
{
4055
    val = cpu_to_be64(val);
4056
    cpu_physical_memory_write(addr, &val, 8);
4057
}
4058

    
4059
/* virtual memory access for debug (includes writing to ROM) */
4060
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4061
                        uint8_t *buf, int len, int is_write)
4062
{
4063
    int l;
4064
    target_phys_addr_t phys_addr;
4065
    target_ulong page;
4066

    
4067
    while (len > 0) {
4068
        page = addr & TARGET_PAGE_MASK;
4069
        phys_addr = cpu_get_phys_page_debug(env, page);
4070
        /* if no physical page mapped, return an error */
4071
        if (phys_addr == -1)
4072
            return -1;
4073
        l = (page + TARGET_PAGE_SIZE) - addr;
4074
        if (l > len)
4075
            l = len;
4076
        phys_addr += (addr & ~TARGET_PAGE_MASK);
4077
        if (is_write)
4078
            cpu_physical_memory_write_rom(phys_addr, buf, l);
4079
        else
4080
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4081
        len -= l;
4082
        buf += l;
4083
        addr += l;
4084
    }
4085
    return 0;
4086
}
4087
#endif
4088

    
4089
/* in deterministic execution mode, instructions doing device I/Os
4090
   must be at the end of the TB */
4091
void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4092
{
4093
    TranslationBlock *tb;
4094
    uint32_t n, cflags;
4095
    target_ulong pc, cs_base;
4096
    uint64_t flags;
4097

    
4098
    tb = tb_find_pc(retaddr);
4099
    if (!tb) {
4100
        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
4101
                  (void *)retaddr);
4102
    }
4103
    n = env->icount_decr.u16.low + tb->icount;
4104
    cpu_restore_state(tb, env, retaddr);
4105
    /* Calculate how many instructions had been executed before the fault
4106
       occurred.  */
4107
    n = n - env->icount_decr.u16.low;
4108
    /* Generate a new TB ending on the I/O insn.  */
4109
    n++;
4110
    /* On MIPS and SH, delay slot instructions can only be restarted if
4111
       they were already the first instruction in the TB.  If this is not
4112
       the first instruction in a TB then re-execute the preceding
4113
       branch.  */
4114
#if defined(TARGET_MIPS)
4115
    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4116
        env->active_tc.PC -= 4;
4117
        env->icount_decr.u16.low++;
4118
        env->hflags &= ~MIPS_HFLAG_BMASK;
4119
    }
4120
#elif defined(TARGET_SH4)
4121
    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4122
            && n > 1) {
4123
        env->pc -= 2;
4124
        env->icount_decr.u16.low++;
4125
        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4126
    }
4127
#endif
4128
    /* This should never happen.  */
4129
    if (n > CF_COUNT_MASK)
4130
        cpu_abort(env, "TB too big during recompile");
4131

    
4132
    cflags = n | CF_LAST_IO;
4133
    pc = tb->pc;
4134
    cs_base = tb->cs_base;
4135
    flags = tb->flags;
4136
    tb_phys_invalidate(tb, -1);
4137
    /* FIXME: In theory this could raise an exception.  In practice
4138
       we have already translated the block once so it's probably ok.  */
4139
    tb_gen_code(env, pc, cs_base, flags, cflags);
4140
    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4141
       the first in the TB) then we end up generating a whole new TB and
4142
       repeating the fault, which is horribly inefficient.
4143
       Better would be to execute just this insn uncached, or generate a
4144
       second new TB.  */
4145
    cpu_resume_from_signal(env, NULL);
4146
}
4147

    
4148
#if !defined(CONFIG_USER_ONLY)
4149

    
4150
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4151
{
4152
    int i, target_code_size, max_target_code_size;
4153
    int direct_jmp_count, direct_jmp2_count, cross_page;
4154
    TranslationBlock *tb;
4155

    
4156
    target_code_size = 0;
4157
    max_target_code_size = 0;
4158
    cross_page = 0;
4159
    direct_jmp_count = 0;
4160
    direct_jmp2_count = 0;
4161
    for(i = 0; i < nb_tbs; i++) {
4162
        tb = &tbs[i];
4163
        target_code_size += tb->size;
4164
        if (tb->size > max_target_code_size)
4165
            max_target_code_size = tb->size;
4166
        if (tb->page_addr[1] != -1)
4167
            cross_page++;
4168
        if (tb->tb_next_offset[0] != 0xffff) {
4169
            direct_jmp_count++;
4170
            if (tb->tb_next_offset[1] != 0xffff) {
4171
                direct_jmp2_count++;
4172
            }
4173
        }
4174
    }
4175
    /* XXX: avoid using doubles ? */
4176
    cpu_fprintf(f, "Translation buffer state:\n");
4177
    cpu_fprintf(f, "gen code size       %td/%ld\n",
4178
                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4179
    cpu_fprintf(f, "TB count            %d/%d\n", 
4180
                nb_tbs, code_gen_max_blocks);
4181
    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
4182
                nb_tbs ? target_code_size / nb_tbs : 0,
4183
                max_target_code_size);
4184
    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
4185
                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4186
                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4187
    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4188
            cross_page,
4189
            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4190
    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
4191
                direct_jmp_count,
4192
                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4193
                direct_jmp2_count,
4194
                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4195
    cpu_fprintf(f, "\nStatistics:\n");
4196
    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
4197
    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4198
    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
4199
    tcg_dump_info(f, cpu_fprintf);
4200
}
4201

    
4202
/*
4203
 * A helper function for the _utterly broken_ virtio device model to find out if
4204
 * it's running on a big endian machine. Don't do this at home kids!
4205
 */
4206
bool virtio_is_big_endian(void);
4207
bool virtio_is_big_endian(void)
4208
{
4209
#if defined(TARGET_WORDS_BIGENDIAN)
4210
    return true;
4211
#else
4212
    return false;
4213
#endif
4214
}
4215

    
4216
#endif
4217

    
4218
#ifndef CONFIG_USER_ONLY
4219
bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4220
{
4221
    MemoryRegionSection *section;
4222

    
4223
    section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4224

    
4225
    return !(memory_region_is_ram(section->mr) ||
4226
             memory_region_is_romd(section->mr));
4227
}
4228
#endif