Statistics
| Branch: | Revision:

root / exec.c @ e2134eb9

History | View | Annotate | Download (121 kB)

1
/*
2
 *  virtual page mapping and translated block handling
3
 *
4
 *  Copyright (c) 2003 Fabrice Bellard
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "config.h"
20
#ifdef _WIN32
21
#include <windows.h>
22
#else
23
#include <sys/types.h>
24
#include <sys/mman.h>
25
#endif
26

    
27
#include "qemu-common.h"
28
#include "cpu.h"
29
#include "tcg.h"
30
#include "hw/hw.h"
31
#include "hw/qdev.h"
32
#include "osdep.h"
33
#include "kvm.h"
34
#include "hw/xen.h"
35
#include "qemu-timer.h"
36
#include "memory.h"
37
#include "exec-memory.h"
38
#if defined(CONFIG_USER_ONLY)
39
#include <qemu.h>
40
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41
#include <sys/param.h>
42
#if __FreeBSD_version >= 700104
43
#define HAVE_KINFO_GETVMMAP
44
#define sigqueue sigqueue_freebsd  /* avoid redefinition */
45
#include <sys/time.h>
46
#include <sys/proc.h>
47
#include <machine/profile.h>
48
#define _KERNEL
49
#include <sys/user.h>
50
#undef _KERNEL
51
#undef sigqueue
52
#include <libutil.h>
53
#endif
54
#endif
55
#else /* !CONFIG_USER_ONLY */
56
#include "xen-mapcache.h"
57
#include "trace.h"
58
#endif
59

    
60
#include "cputlb.h"
61

    
62
#include "memory-internal.h"
63

    
64
//#define DEBUG_TB_INVALIDATE
65
//#define DEBUG_FLUSH
66
//#define DEBUG_UNASSIGNED
67

    
68
/* make various TB consistency checks */
69
//#define DEBUG_TB_CHECK
70

    
71
//#define DEBUG_IOPORT
72
//#define DEBUG_SUBPAGE
73

    
74
#if !defined(CONFIG_USER_ONLY)
75
/* TB consistency checks only implemented for usermode emulation.  */
76
#undef DEBUG_TB_CHECK
77
#endif
78

    
79
#define SMC_BITMAP_USE_THRESHOLD 10
80

    
81
static TranslationBlock *tbs;
82
static int code_gen_max_blocks;
83
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84
static int nb_tbs;
85
/* any access to the tbs or the page table must use this lock */
86
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
87

    
88
uint8_t *code_gen_prologue;
89
static uint8_t *code_gen_buffer;
90
static size_t code_gen_buffer_size;
91
/* threshold to flush the translated code buffer */
92
static size_t code_gen_buffer_max_size;
93
static uint8_t *code_gen_ptr;
94

    
95
#if !defined(CONFIG_USER_ONLY)
96
int phys_ram_fd;
97
static int in_migration;
98

    
99
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
100

    
101
static MemoryRegion *system_memory;
102
static MemoryRegion *system_io;
103

    
104
AddressSpace address_space_io;
105
AddressSpace address_space_memory;
106

    
107
MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108
static MemoryRegion io_mem_subpage_ram;
109

    
110
#endif
111

    
112
CPUArchState *first_cpu;
113
/* current CPU in the current thread. It is only valid inside
114
   cpu_exec() */
115
DEFINE_TLS(CPUArchState *,cpu_single_env);
116
/* 0 = Do not count executed instructions.
117
   1 = Precise instruction counting.
118
   2 = Adaptive rate instruction counting.  */
119
int use_icount = 0;
120

    
121
typedef struct PageDesc {
122
    /* list of TBs intersecting this ram page */
123
    TranslationBlock *first_tb;
124
    /* in order to optimize self modifying code, we count the number
125
       of lookups we do to a given page to use a bitmap */
126
    unsigned int code_write_count;
127
    uint8_t *code_bitmap;
128
#if defined(CONFIG_USER_ONLY)
129
    unsigned long flags;
130
#endif
131
} PageDesc;
132

    
133
/* In system mode we want L1_MAP to be based on ram offsets,
134
   while in user mode we want it to be based on virtual addresses.  */
135
#if !defined(CONFIG_USER_ONLY)
136
#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137
# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
138
#else
139
# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
140
#endif
141
#else
142
# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
143
#endif
144

    
145
/* Size of the L2 (and L3, etc) page tables.  */
146
#define L2_BITS 10
147
#define L2_SIZE (1 << L2_BITS)
148

    
149
#define P_L2_LEVELS \
150
    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
151

    
152
/* The bits remaining after N lower levels of page tables.  */
153
#define V_L1_BITS_REM \
154
    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
155

    
156
#if V_L1_BITS_REM < 4
157
#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
158
#else
159
#define V_L1_BITS  V_L1_BITS_REM
160
#endif
161

    
162
#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
163

    
164
#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
165

    
166
uintptr_t qemu_real_host_page_size;
167
uintptr_t qemu_host_page_size;
168
uintptr_t qemu_host_page_mask;
169

    
170
/* This is a multi-level map on the virtual address space.
171
   The bottom level has pointers to PageDesc.  */
172
static void *l1_map[V_L1_SIZE];
173

    
174
#if !defined(CONFIG_USER_ONLY)
175

    
176
static MemoryRegionSection *phys_sections;
177
static unsigned phys_sections_nb, phys_sections_nb_alloc;
178
static uint16_t phys_section_unassigned;
179
static uint16_t phys_section_notdirty;
180
static uint16_t phys_section_rom;
181
static uint16_t phys_section_watch;
182

    
183
/* Simple allocator for PhysPageEntry nodes */
184
static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185
static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
186

    
187
#define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
188

    
189
static void io_mem_init(void);
190
static void memory_map_init(void);
191

    
192
static MemoryRegion io_mem_watch;
193
#endif
194

    
195
/* statistics */
196
static int tb_flush_count;
197
static int tb_phys_invalidate_count;
198

    
199
#ifdef _WIN32
200
static inline void map_exec(void *addr, long size)
201
{
202
    DWORD old_protect;
203
    VirtualProtect(addr, size,
204
                   PAGE_EXECUTE_READWRITE, &old_protect);
205
    
206
}
207
#else
208
static inline void map_exec(void *addr, long size)
209
{
210
    unsigned long start, end, page_size;
211
    
212
    page_size = getpagesize();
213
    start = (unsigned long)addr;
214
    start &= ~(page_size - 1);
215
    
216
    end = (unsigned long)addr + size;
217
    end += page_size - 1;
218
    end &= ~(page_size - 1);
219
    
220
    mprotect((void *)start, end - start,
221
             PROT_READ | PROT_WRITE | PROT_EXEC);
222
}
223
#endif
224

    
225
static void page_init(void)
226
{
227
    /* NOTE: we can always suppose that qemu_host_page_size >=
228
       TARGET_PAGE_SIZE */
229
#ifdef _WIN32
230
    {
231
        SYSTEM_INFO system_info;
232

    
233
        GetSystemInfo(&system_info);
234
        qemu_real_host_page_size = system_info.dwPageSize;
235
    }
236
#else
237
    qemu_real_host_page_size = getpagesize();
238
#endif
239
    if (qemu_host_page_size == 0)
240
        qemu_host_page_size = qemu_real_host_page_size;
241
    if (qemu_host_page_size < TARGET_PAGE_SIZE)
242
        qemu_host_page_size = TARGET_PAGE_SIZE;
243
    qemu_host_page_mask = ~(qemu_host_page_size - 1);
244

    
245
#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
246
    {
247
#ifdef HAVE_KINFO_GETVMMAP
248
        struct kinfo_vmentry *freep;
249
        int i, cnt;
250

    
251
        freep = kinfo_getvmmap(getpid(), &cnt);
252
        if (freep) {
253
            mmap_lock();
254
            for (i = 0; i < cnt; i++) {
255
                unsigned long startaddr, endaddr;
256

    
257
                startaddr = freep[i].kve_start;
258
                endaddr = freep[i].kve_end;
259
                if (h2g_valid(startaddr)) {
260
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
261

    
262
                    if (h2g_valid(endaddr)) {
263
                        endaddr = h2g(endaddr);
264
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
265
                    } else {
266
#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
267
                        endaddr = ~0ul;
268
                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
269
#endif
270
                    }
271
                }
272
            }
273
            free(freep);
274
            mmap_unlock();
275
        }
276
#else
277
        FILE *f;
278

    
279
        last_brk = (unsigned long)sbrk(0);
280

    
281
        f = fopen("/compat/linux/proc/self/maps", "r");
282
        if (f) {
283
            mmap_lock();
284

    
285
            do {
286
                unsigned long startaddr, endaddr;
287
                int n;
288

    
289
                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
290

    
291
                if (n == 2 && h2g_valid(startaddr)) {
292
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
293

    
294
                    if (h2g_valid(endaddr)) {
295
                        endaddr = h2g(endaddr);
296
                    } else {
297
                        endaddr = ~0ul;
298
                    }
299
                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300
                }
301
            } while (!feof(f));
302

    
303
            fclose(f);
304
            mmap_unlock();
305
        }
306
#endif
307
    }
308
#endif
309
}
310

    
311
static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
312
{
313
    PageDesc *pd;
314
    void **lp;
315
    int i;
316

    
317
#if defined(CONFIG_USER_ONLY)
318
    /* We can't use g_malloc because it may recurse into a locked mutex. */
319
# define ALLOC(P, SIZE)                                 \
320
    do {                                                \
321
        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
322
                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
323
    } while (0)
324
#else
325
# define ALLOC(P, SIZE) \
326
    do { P = g_malloc0(SIZE); } while (0)
327
#endif
328

    
329
    /* Level 1.  Always allocated.  */
330
    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
331

    
332
    /* Level 2..N-1.  */
333
    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
334
        void **p = *lp;
335

    
336
        if (p == NULL) {
337
            if (!alloc) {
338
                return NULL;
339
            }
340
            ALLOC(p, sizeof(void *) * L2_SIZE);
341
            *lp = p;
342
        }
343

    
344
        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
345
    }
346

    
347
    pd = *lp;
348
    if (pd == NULL) {
349
        if (!alloc) {
350
            return NULL;
351
        }
352
        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
353
        *lp = pd;
354
    }
355

    
356
#undef ALLOC
357

    
358
    return pd + (index & (L2_SIZE - 1));
359
}
360

    
361
static inline PageDesc *page_find(tb_page_addr_t index)
362
{
363
    return page_find_alloc(index, 0);
364
}
365

    
366
#if !defined(CONFIG_USER_ONLY)
367

    
368
static void phys_map_node_reserve(unsigned nodes)
369
{
370
    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
371
        typedef PhysPageEntry Node[L2_SIZE];
372
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
373
        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
374
                                      phys_map_nodes_nb + nodes);
375
        phys_map_nodes = g_renew(Node, phys_map_nodes,
376
                                 phys_map_nodes_nb_alloc);
377
    }
378
}
379

    
380
static uint16_t phys_map_node_alloc(void)
381
{
382
    unsigned i;
383
    uint16_t ret;
384

    
385
    ret = phys_map_nodes_nb++;
386
    assert(ret != PHYS_MAP_NODE_NIL);
387
    assert(ret != phys_map_nodes_nb_alloc);
388
    for (i = 0; i < L2_SIZE; ++i) {
389
        phys_map_nodes[ret][i].is_leaf = 0;
390
        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
391
    }
392
    return ret;
393
}
394

    
395
static void phys_map_nodes_reset(void)
396
{
397
    phys_map_nodes_nb = 0;
398
}
399

    
400

    
401
static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
402
                                hwaddr *nb, uint16_t leaf,
403
                                int level)
404
{
405
    PhysPageEntry *p;
406
    int i;
407
    hwaddr step = (hwaddr)1 << (level * L2_BITS);
408

    
409
    if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
410
        lp->ptr = phys_map_node_alloc();
411
        p = phys_map_nodes[lp->ptr];
412
        if (level == 0) {
413
            for (i = 0; i < L2_SIZE; i++) {
414
                p[i].is_leaf = 1;
415
                p[i].ptr = phys_section_unassigned;
416
            }
417
        }
418
    } else {
419
        p = phys_map_nodes[lp->ptr];
420
    }
421
    lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
422

    
423
    while (*nb && lp < &p[L2_SIZE]) {
424
        if ((*index & (step - 1)) == 0 && *nb >= step) {
425
            lp->is_leaf = true;
426
            lp->ptr = leaf;
427
            *index += step;
428
            *nb -= step;
429
        } else {
430
            phys_page_set_level(lp, index, nb, leaf, level - 1);
431
        }
432
        ++lp;
433
    }
434
}
435

    
436
static void phys_page_set(AddressSpaceDispatch *d,
437
                          hwaddr index, hwaddr nb,
438
                          uint16_t leaf)
439
{
440
    /* Wildly overreserve - it doesn't matter much. */
441
    phys_map_node_reserve(3 * P_L2_LEVELS);
442

    
443
    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
444
}
445

    
446
MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
447
{
448
    PhysPageEntry lp = d->phys_map;
449
    PhysPageEntry *p;
450
    int i;
451
    uint16_t s_index = phys_section_unassigned;
452

    
453
    for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
454
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
455
            goto not_found;
456
        }
457
        p = phys_map_nodes[lp.ptr];
458
        lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
459
    }
460

    
461
    s_index = lp.ptr;
462
not_found:
463
    return &phys_sections[s_index];
464
}
465

    
466
bool memory_region_is_unassigned(MemoryRegion *mr)
467
{
468
    return mr != &io_mem_ram && mr != &io_mem_rom
469
        && mr != &io_mem_notdirty && !mr->rom_device
470
        && mr != &io_mem_watch;
471
}
472

    
473
#define mmap_lock() do { } while(0)
474
#define mmap_unlock() do { } while(0)
475
#endif
476

    
477
#if defined(CONFIG_USER_ONLY)
478
/* Currently it is not recommended to allocate big chunks of data in
479
   user mode. It will change when a dedicated libc will be used.  */
480
/* ??? 64-bit hosts ought to have no problem mmaping data outside the
481
   region in which the guest needs to run.  Revisit this.  */
482
#define USE_STATIC_CODE_GEN_BUFFER
483
#endif
484

    
485
/* ??? Should configure for this, not list operating systems here.  */
486
#if (defined(__linux__) \
487
    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
488
    || defined(__DragonFly__) || defined(__OpenBSD__) \
489
    || defined(__NetBSD__))
490
# define USE_MMAP
491
#endif
492

    
493
/* Minimum size of the code gen buffer.  This number is randomly chosen,
494
   but not so small that we can't have a fair number of TB's live.  */
495
#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
496

    
497
/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
498
   indicated, this is constrained by the range of direct branches on the
499
   host cpu, as used by the TCG implementation of goto_tb.  */
500
#if defined(__x86_64__)
501
# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
502
#elif defined(__sparc__)
503
# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
504
#elif defined(__arm__)
505
# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
506
#elif defined(__s390x__)
507
  /* We have a +- 4GB range on the branches; leave some slop.  */
508
# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
509
#else
510
# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
511
#endif
512

    
513
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
514

    
515
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
516
  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
517
   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
518

    
519
static inline size_t size_code_gen_buffer(size_t tb_size)
520
{
521
    /* Size the buffer.  */
522
    if (tb_size == 0) {
523
#ifdef USE_STATIC_CODE_GEN_BUFFER
524
        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
525
#else
526
        /* ??? Needs adjustments.  */
527
        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
528
           static buffer, we could size this on RESERVED_VA, on the text
529
           segment size of the executable, or continue to use the default.  */
530
        tb_size = (unsigned long)(ram_size / 4);
531
#endif
532
    }
533
    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
534
        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
535
    }
536
    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
537
        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
538
    }
539
    code_gen_buffer_size = tb_size;
540
    return tb_size;
541
}
542

    
543
#ifdef USE_STATIC_CODE_GEN_BUFFER
544
static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
545
    __attribute__((aligned(CODE_GEN_ALIGN)));
546

    
547
static inline void *alloc_code_gen_buffer(void)
548
{
549
    map_exec(static_code_gen_buffer, code_gen_buffer_size);
550
    return static_code_gen_buffer;
551
}
552
#elif defined(USE_MMAP)
553
static inline void *alloc_code_gen_buffer(void)
554
{
555
    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
556
    uintptr_t start = 0;
557
    void *buf;
558

    
559
    /* Constrain the position of the buffer based on the host cpu.
560
       Note that these addresses are chosen in concert with the
561
       addresses assigned in the relevant linker script file.  */
562
# if defined(__PIE__) || defined(__PIC__)
563
    /* Don't bother setting a preferred location if we're building
564
       a position-independent executable.  We're more likely to get
565
       an address near the main executable if we let the kernel
566
       choose the address.  */
567
# elif defined(__x86_64__) && defined(MAP_32BIT)
568
    /* Force the memory down into low memory with the executable.
569
       Leave the choice of exact location with the kernel.  */
570
    flags |= MAP_32BIT;
571
    /* Cannot expect to map more than 800MB in low memory.  */
572
    if (code_gen_buffer_size > 800u * 1024 * 1024) {
573
        code_gen_buffer_size = 800u * 1024 * 1024;
574
    }
575
# elif defined(__sparc__)
576
    start = 0x40000000ul;
577
# elif defined(__s390x__)
578
    start = 0x90000000ul;
579
# endif
580

    
581
    buf = mmap((void *)start, code_gen_buffer_size,
582
               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
583
    return buf == MAP_FAILED ? NULL : buf;
584
}
585
#else
586
static inline void *alloc_code_gen_buffer(void)
587
{
588
    void *buf = g_malloc(code_gen_buffer_size);
589
    if (buf) {
590
        map_exec(buf, code_gen_buffer_size);
591
    }
592
    return buf;
593
}
594
#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
595

    
596
static inline void code_gen_alloc(size_t tb_size)
597
{
598
    code_gen_buffer_size = size_code_gen_buffer(tb_size);
599
    code_gen_buffer = alloc_code_gen_buffer();
600
    if (code_gen_buffer == NULL) {
601
        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
602
        exit(1);
603
    }
604

    
605
    /* Steal room for the prologue at the end of the buffer.  This ensures
606
       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
607
       from TB's to the prologue are going to be in range.  It also means
608
       that we don't need to mark (additional) portions of the data segment
609
       as executable.  */
610
    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
611
    code_gen_buffer_size -= 1024;
612

    
613
    code_gen_buffer_max_size = code_gen_buffer_size -
614
        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
615
    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
616
    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
617
}
618

    
619
/* Must be called before using the QEMU cpus. 'tb_size' is the size
620
   (in bytes) allocated to the translation buffer. Zero means default
621
   size. */
622
void tcg_exec_init(unsigned long tb_size)
623
{
624
    cpu_gen_init();
625
    code_gen_alloc(tb_size);
626
    code_gen_ptr = code_gen_buffer;
627
    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
628
    page_init();
629
#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
630
    /* There's no guest base to take into account, so go ahead and
631
       initialize the prologue now.  */
632
    tcg_prologue_init(&tcg_ctx);
633
#endif
634
}
635

    
636
bool tcg_enabled(void)
637
{
638
    return code_gen_buffer != NULL;
639
}
640

    
641
void cpu_exec_init_all(void)
642
{
643
#if !defined(CONFIG_USER_ONLY)
644
    memory_map_init();
645
    io_mem_init();
646
#endif
647
}
648

    
649
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
650

    
651
static int cpu_common_post_load(void *opaque, int version_id)
652
{
653
    CPUArchState *env = opaque;
654

    
655
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
656
       version_id is increased. */
657
    env->interrupt_request &= ~0x01;
658
    tlb_flush(env, 1);
659

    
660
    return 0;
661
}
662

    
663
static const VMStateDescription vmstate_cpu_common = {
664
    .name = "cpu_common",
665
    .version_id = 1,
666
    .minimum_version_id = 1,
667
    .minimum_version_id_old = 1,
668
    .post_load = cpu_common_post_load,
669
    .fields      = (VMStateField []) {
670
        VMSTATE_UINT32(halted, CPUArchState),
671
        VMSTATE_UINT32(interrupt_request, CPUArchState),
672
        VMSTATE_END_OF_LIST()
673
    }
674
};
675
#endif
676

    
677
CPUArchState *qemu_get_cpu(int cpu)
678
{
679
    CPUArchState *env = first_cpu;
680

    
681
    while (env) {
682
        if (env->cpu_index == cpu)
683
            break;
684
        env = env->next_cpu;
685
    }
686

    
687
    return env;
688
}
689

    
690
void cpu_exec_init(CPUArchState *env)
691
{
692
    CPUArchState **penv;
693
    int cpu_index;
694

    
695
#if defined(CONFIG_USER_ONLY)
696
    cpu_list_lock();
697
#endif
698
    env->next_cpu = NULL;
699
    penv = &first_cpu;
700
    cpu_index = 0;
701
    while (*penv != NULL) {
702
        penv = &(*penv)->next_cpu;
703
        cpu_index++;
704
    }
705
    env->cpu_index = cpu_index;
706
    env->numa_node = 0;
707
    QTAILQ_INIT(&env->breakpoints);
708
    QTAILQ_INIT(&env->watchpoints);
709
#ifndef CONFIG_USER_ONLY
710
    env->thread_id = qemu_get_thread_id();
711
#endif
712
    *penv = env;
713
#if defined(CONFIG_USER_ONLY)
714
    cpu_list_unlock();
715
#endif
716
#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
717
    vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
718
    register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
719
                    cpu_save, cpu_load, env);
720
#endif
721
}
722

    
723
/* Allocate a new translation block. Flush the translation buffer if
724
   too many translation blocks or too much generated code. */
725
static TranslationBlock *tb_alloc(target_ulong pc)
726
{
727
    TranslationBlock *tb;
728

    
729
    if (nb_tbs >= code_gen_max_blocks ||
730
        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
731
        return NULL;
732
    tb = &tbs[nb_tbs++];
733
    tb->pc = pc;
734
    tb->cflags = 0;
735
    return tb;
736
}
737

    
738
void tb_free(TranslationBlock *tb)
739
{
740
    /* In practice this is mostly used for single use temporary TB
741
       Ignore the hard cases and just back up if this TB happens to
742
       be the last one generated.  */
743
    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
744
        code_gen_ptr = tb->tc_ptr;
745
        nb_tbs--;
746
    }
747
}
748

    
749
static inline void invalidate_page_bitmap(PageDesc *p)
750
{
751
    if (p->code_bitmap) {
752
        g_free(p->code_bitmap);
753
        p->code_bitmap = NULL;
754
    }
755
    p->code_write_count = 0;
756
}
757

    
758
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
759

    
760
static void page_flush_tb_1 (int level, void **lp)
761
{
762
    int i;
763

    
764
    if (*lp == NULL) {
765
        return;
766
    }
767
    if (level == 0) {
768
        PageDesc *pd = *lp;
769
        for (i = 0; i < L2_SIZE; ++i) {
770
            pd[i].first_tb = NULL;
771
            invalidate_page_bitmap(pd + i);
772
        }
773
    } else {
774
        void **pp = *lp;
775
        for (i = 0; i < L2_SIZE; ++i) {
776
            page_flush_tb_1 (level - 1, pp + i);
777
        }
778
    }
779
}
780

    
781
static void page_flush_tb(void)
782
{
783
    int i;
784
    for (i = 0; i < V_L1_SIZE; i++) {
785
        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
786
    }
787
}
788

    
789
/* flush all the translation blocks */
790
/* XXX: tb_flush is currently not thread safe */
791
void tb_flush(CPUArchState *env1)
792
{
793
    CPUArchState *env;
794
#if defined(DEBUG_FLUSH)
795
    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
796
           (unsigned long)(code_gen_ptr - code_gen_buffer),
797
           nb_tbs, nb_tbs > 0 ?
798
           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
799
#endif
800
    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
801
        cpu_abort(env1, "Internal error: code buffer overflow\n");
802

    
803
    nb_tbs = 0;
804

    
805
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
806
        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
807
    }
808

    
809
    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
810
    page_flush_tb();
811

    
812
    code_gen_ptr = code_gen_buffer;
813
    /* XXX: flush processor icache at this point if cache flush is
814
       expensive */
815
    tb_flush_count++;
816
}
817

    
818
#ifdef DEBUG_TB_CHECK
819

    
820
static void tb_invalidate_check(target_ulong address)
821
{
822
    TranslationBlock *tb;
823
    int i;
824
    address &= TARGET_PAGE_MASK;
825
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
826
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
827
            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
828
                  address >= tb->pc + tb->size)) {
829
                printf("ERROR invalidate: address=" TARGET_FMT_lx
830
                       " PC=%08lx size=%04x\n",
831
                       address, (long)tb->pc, tb->size);
832
            }
833
        }
834
    }
835
}
836

    
837
/* verify that all the pages have correct rights for code */
838
static void tb_page_check(void)
839
{
840
    TranslationBlock *tb;
841
    int i, flags1, flags2;
842

    
843
    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
844
        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
845
            flags1 = page_get_flags(tb->pc);
846
            flags2 = page_get_flags(tb->pc + tb->size - 1);
847
            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
848
                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
849
                       (long)tb->pc, tb->size, flags1, flags2);
850
            }
851
        }
852
    }
853
}
854

    
855
#endif
856

    
857
/* invalidate one TB */
858
static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
859
                             int next_offset)
860
{
861
    TranslationBlock *tb1;
862
    for(;;) {
863
        tb1 = *ptb;
864
        if (tb1 == tb) {
865
            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
866
            break;
867
        }
868
        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
869
    }
870
}
871

    
872
static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
873
{
874
    TranslationBlock *tb1;
875
    unsigned int n1;
876

    
877
    for(;;) {
878
        tb1 = *ptb;
879
        n1 = (uintptr_t)tb1 & 3;
880
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
881
        if (tb1 == tb) {
882
            *ptb = tb1->page_next[n1];
883
            break;
884
        }
885
        ptb = &tb1->page_next[n1];
886
    }
887
}
888

    
889
static inline void tb_jmp_remove(TranslationBlock *tb, int n)
890
{
891
    TranslationBlock *tb1, **ptb;
892
    unsigned int n1;
893

    
894
    ptb = &tb->jmp_next[n];
895
    tb1 = *ptb;
896
    if (tb1) {
897
        /* find tb(n) in circular list */
898
        for(;;) {
899
            tb1 = *ptb;
900
            n1 = (uintptr_t)tb1 & 3;
901
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
902
            if (n1 == n && tb1 == tb)
903
                break;
904
            if (n1 == 2) {
905
                ptb = &tb1->jmp_first;
906
            } else {
907
                ptb = &tb1->jmp_next[n1];
908
            }
909
        }
910
        /* now we can suppress tb(n) from the list */
911
        *ptb = tb->jmp_next[n];
912

    
913
        tb->jmp_next[n] = NULL;
914
    }
915
}
916

    
917
/* reset the jump entry 'n' of a TB so that it is not chained to
918
   another TB */
919
static inline void tb_reset_jump(TranslationBlock *tb, int n)
920
{
921
    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
922
}
923

    
924
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
925
{
926
    CPUArchState *env;
927
    PageDesc *p;
928
    unsigned int h, n1;
929
    tb_page_addr_t phys_pc;
930
    TranslationBlock *tb1, *tb2;
931

    
932
    /* remove the TB from the hash list */
933
    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
934
    h = tb_phys_hash_func(phys_pc);
935
    tb_remove(&tb_phys_hash[h], tb,
936
              offsetof(TranslationBlock, phys_hash_next));
937

    
938
    /* remove the TB from the page list */
939
    if (tb->page_addr[0] != page_addr) {
940
        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
941
        tb_page_remove(&p->first_tb, tb);
942
        invalidate_page_bitmap(p);
943
    }
944
    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
945
        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
946
        tb_page_remove(&p->first_tb, tb);
947
        invalidate_page_bitmap(p);
948
    }
949

    
950
    tb_invalidated_flag = 1;
951

    
952
    /* remove the TB from the hash list */
953
    h = tb_jmp_cache_hash_func(tb->pc);
954
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
955
        if (env->tb_jmp_cache[h] == tb)
956
            env->tb_jmp_cache[h] = NULL;
957
    }
958

    
959
    /* suppress this TB from the two jump lists */
960
    tb_jmp_remove(tb, 0);
961
    tb_jmp_remove(tb, 1);
962

    
963
    /* suppress any remaining jumps to this TB */
964
    tb1 = tb->jmp_first;
965
    for(;;) {
966
        n1 = (uintptr_t)tb1 & 3;
967
        if (n1 == 2)
968
            break;
969
        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
970
        tb2 = tb1->jmp_next[n1];
971
        tb_reset_jump(tb1, n1);
972
        tb1->jmp_next[n1] = NULL;
973
        tb1 = tb2;
974
    }
975
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
976

    
977
    tb_phys_invalidate_count++;
978
}
979

    
980
static inline void set_bits(uint8_t *tab, int start, int len)
981
{
982
    int end, mask, end1;
983

    
984
    end = start + len;
985
    tab += start >> 3;
986
    mask = 0xff << (start & 7);
987
    if ((start & ~7) == (end & ~7)) {
988
        if (start < end) {
989
            mask &= ~(0xff << (end & 7));
990
            *tab |= mask;
991
        }
992
    } else {
993
        *tab++ |= mask;
994
        start = (start + 8) & ~7;
995
        end1 = end & ~7;
996
        while (start < end1) {
997
            *tab++ = 0xff;
998
            start += 8;
999
        }
1000
        if (start < end) {
1001
            mask = ~(0xff << (end & 7));
1002
            *tab |= mask;
1003
        }
1004
    }
1005
}
1006

    
1007
static void build_page_bitmap(PageDesc *p)
1008
{
1009
    int n, tb_start, tb_end;
1010
    TranslationBlock *tb;
1011

    
1012
    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1013

    
1014
    tb = p->first_tb;
1015
    while (tb != NULL) {
1016
        n = (uintptr_t)tb & 3;
1017
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1018
        /* NOTE: this is subtle as a TB may span two physical pages */
1019
        if (n == 0) {
1020
            /* NOTE: tb_end may be after the end of the page, but
1021
               it is not a problem */
1022
            tb_start = tb->pc & ~TARGET_PAGE_MASK;
1023
            tb_end = tb_start + tb->size;
1024
            if (tb_end > TARGET_PAGE_SIZE)
1025
                tb_end = TARGET_PAGE_SIZE;
1026
        } else {
1027
            tb_start = 0;
1028
            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1029
        }
1030
        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1031
        tb = tb->page_next[n];
1032
    }
1033
}
1034

    
1035
TranslationBlock *tb_gen_code(CPUArchState *env,
1036
                              target_ulong pc, target_ulong cs_base,
1037
                              int flags, int cflags)
1038
{
1039
    TranslationBlock *tb;
1040
    uint8_t *tc_ptr;
1041
    tb_page_addr_t phys_pc, phys_page2;
1042
    target_ulong virt_page2;
1043
    int code_gen_size;
1044

    
1045
    phys_pc = get_page_addr_code(env, pc);
1046
    tb = tb_alloc(pc);
1047
    if (!tb) {
1048
        /* flush must be done */
1049
        tb_flush(env);
1050
        /* cannot fail at this point */
1051
        tb = tb_alloc(pc);
1052
        /* Don't forget to invalidate previous TB info.  */
1053
        tb_invalidated_flag = 1;
1054
    }
1055
    tc_ptr = code_gen_ptr;
1056
    tb->tc_ptr = tc_ptr;
1057
    tb->cs_base = cs_base;
1058
    tb->flags = flags;
1059
    tb->cflags = cflags;
1060
    cpu_gen_code(env, tb, &code_gen_size);
1061
    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1062
                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1063

    
1064
    /* check next page if needed */
1065
    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1066
    phys_page2 = -1;
1067
    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1068
        phys_page2 = get_page_addr_code(env, virt_page2);
1069
    }
1070
    tb_link_page(tb, phys_pc, phys_page2);
1071
    return tb;
1072
}
1073

    
1074
/*
1075
 * Invalidate all TBs which intersect with the target physical address range
1076
 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1077
 * 'is_cpu_write_access' should be true if called from a real cpu write
1078
 * access: the virtual CPU will exit the current TB if code is modified inside
1079
 * this TB.
1080
 */
1081
void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1082
                              int is_cpu_write_access)
1083
{
1084
    while (start < end) {
1085
        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1086
        start &= TARGET_PAGE_MASK;
1087
        start += TARGET_PAGE_SIZE;
1088
    }
1089
}
1090

    
1091
/*
1092
 * Invalidate all TBs which intersect with the target physical address range
1093
 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1094
 * 'is_cpu_write_access' should be true if called from a real cpu write
1095
 * access: the virtual CPU will exit the current TB if code is modified inside
1096
 * this TB.
1097
 */
1098
void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1099
                                   int is_cpu_write_access)
1100
{
1101
    TranslationBlock *tb, *tb_next, *saved_tb;
1102
    CPUArchState *env = cpu_single_env;
1103
    tb_page_addr_t tb_start, tb_end;
1104
    PageDesc *p;
1105
    int n;
1106
#ifdef TARGET_HAS_PRECISE_SMC
1107
    int current_tb_not_found = is_cpu_write_access;
1108
    TranslationBlock *current_tb = NULL;
1109
    int current_tb_modified = 0;
1110
    target_ulong current_pc = 0;
1111
    target_ulong current_cs_base = 0;
1112
    int current_flags = 0;
1113
#endif /* TARGET_HAS_PRECISE_SMC */
1114

    
1115
    p = page_find(start >> TARGET_PAGE_BITS);
1116
    if (!p)
1117
        return;
1118
    if (!p->code_bitmap &&
1119
        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1120
        is_cpu_write_access) {
1121
        /* build code bitmap */
1122
        build_page_bitmap(p);
1123
    }
1124

    
1125
    /* we remove all the TBs in the range [start, end[ */
1126
    /* XXX: see if in some cases it could be faster to invalidate all the code */
1127
    tb = p->first_tb;
1128
    while (tb != NULL) {
1129
        n = (uintptr_t)tb & 3;
1130
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1131
        tb_next = tb->page_next[n];
1132
        /* NOTE: this is subtle as a TB may span two physical pages */
1133
        if (n == 0) {
1134
            /* NOTE: tb_end may be after the end of the page, but
1135
               it is not a problem */
1136
            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1137
            tb_end = tb_start + tb->size;
1138
        } else {
1139
            tb_start = tb->page_addr[1];
1140
            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1141
        }
1142
        if (!(tb_end <= start || tb_start >= end)) {
1143
#ifdef TARGET_HAS_PRECISE_SMC
1144
            if (current_tb_not_found) {
1145
                current_tb_not_found = 0;
1146
                current_tb = NULL;
1147
                if (env->mem_io_pc) {
1148
                    /* now we have a real cpu fault */
1149
                    current_tb = tb_find_pc(env->mem_io_pc);
1150
                }
1151
            }
1152
            if (current_tb == tb &&
1153
                (current_tb->cflags & CF_COUNT_MASK) != 1) {
1154
                /* If we are modifying the current TB, we must stop
1155
                its execution. We could be more precise by checking
1156
                that the modification is after the current PC, but it
1157
                would require a specialized function to partially
1158
                restore the CPU state */
1159

    
1160
                current_tb_modified = 1;
1161
                cpu_restore_state(current_tb, env, env->mem_io_pc);
1162
                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1163
                                     &current_flags);
1164
            }
1165
#endif /* TARGET_HAS_PRECISE_SMC */
1166
            /* we need to do that to handle the case where a signal
1167
               occurs while doing tb_phys_invalidate() */
1168
            saved_tb = NULL;
1169
            if (env) {
1170
                saved_tb = env->current_tb;
1171
                env->current_tb = NULL;
1172
            }
1173
            tb_phys_invalidate(tb, -1);
1174
            if (env) {
1175
                env->current_tb = saved_tb;
1176
                if (env->interrupt_request && env->current_tb)
1177
                    cpu_interrupt(env, env->interrupt_request);
1178
            }
1179
        }
1180
        tb = tb_next;
1181
    }
1182
#if !defined(CONFIG_USER_ONLY)
1183
    /* if no code remaining, no need to continue to use slow writes */
1184
    if (!p->first_tb) {
1185
        invalidate_page_bitmap(p);
1186
        if (is_cpu_write_access) {
1187
            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1188
        }
1189
    }
1190
#endif
1191
#ifdef TARGET_HAS_PRECISE_SMC
1192
    if (current_tb_modified) {
1193
        /* we generate a block containing just the instruction
1194
           modifying the memory. It will ensure that it cannot modify
1195
           itself */
1196
        env->current_tb = NULL;
1197
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1198
        cpu_resume_from_signal(env, NULL);
1199
    }
1200
#endif
1201
}
1202

    
1203
/* len must be <= 8 and start must be a multiple of len */
1204
static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1205
{
1206
    PageDesc *p;
1207
    int offset, b;
1208
#if 0
1209
    if (1) {
1210
        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1211
                  cpu_single_env->mem_io_vaddr, len,
1212
                  cpu_single_env->eip,
1213
                  cpu_single_env->eip +
1214
                  (intptr_t)cpu_single_env->segs[R_CS].base);
1215
    }
1216
#endif
1217
    p = page_find(start >> TARGET_PAGE_BITS);
1218
    if (!p)
1219
        return;
1220
    if (p->code_bitmap) {
1221
        offset = start & ~TARGET_PAGE_MASK;
1222
        b = p->code_bitmap[offset >> 3] >> (offset & 7);
1223
        if (b & ((1 << len) - 1))
1224
            goto do_invalidate;
1225
    } else {
1226
    do_invalidate:
1227
        tb_invalidate_phys_page_range(start, start + len, 1);
1228
    }
1229
}
1230

    
1231
#if !defined(CONFIG_SOFTMMU)
1232
static void tb_invalidate_phys_page(tb_page_addr_t addr,
1233
                                    uintptr_t pc, void *puc)
1234
{
1235
    TranslationBlock *tb;
1236
    PageDesc *p;
1237
    int n;
1238
#ifdef TARGET_HAS_PRECISE_SMC
1239
    TranslationBlock *current_tb = NULL;
1240
    CPUArchState *env = cpu_single_env;
1241
    int current_tb_modified = 0;
1242
    target_ulong current_pc = 0;
1243
    target_ulong current_cs_base = 0;
1244
    int current_flags = 0;
1245
#endif
1246

    
1247
    addr &= TARGET_PAGE_MASK;
1248
    p = page_find(addr >> TARGET_PAGE_BITS);
1249
    if (!p)
1250
        return;
1251
    tb = p->first_tb;
1252
#ifdef TARGET_HAS_PRECISE_SMC
1253
    if (tb && pc != 0) {
1254
        current_tb = tb_find_pc(pc);
1255
    }
1256
#endif
1257
    while (tb != NULL) {
1258
        n = (uintptr_t)tb & 3;
1259
        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1260
#ifdef TARGET_HAS_PRECISE_SMC
1261
        if (current_tb == tb &&
1262
            (current_tb->cflags & CF_COUNT_MASK) != 1) {
1263
                /* If we are modifying the current TB, we must stop
1264
                   its execution. We could be more precise by checking
1265
                   that the modification is after the current PC, but it
1266
                   would require a specialized function to partially
1267
                   restore the CPU state */
1268

    
1269
            current_tb_modified = 1;
1270
            cpu_restore_state(current_tb, env, pc);
1271
            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1272
                                 &current_flags);
1273
        }
1274
#endif /* TARGET_HAS_PRECISE_SMC */
1275
        tb_phys_invalidate(tb, addr);
1276
        tb = tb->page_next[n];
1277
    }
1278
    p->first_tb = NULL;
1279
#ifdef TARGET_HAS_PRECISE_SMC
1280
    if (current_tb_modified) {
1281
        /* we generate a block containing just the instruction
1282
           modifying the memory. It will ensure that it cannot modify
1283
           itself */
1284
        env->current_tb = NULL;
1285
        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1286
        cpu_resume_from_signal(env, puc);
1287
    }
1288
#endif
1289
}
1290
#endif
1291

    
1292
/* add the tb in the target page and protect it if necessary */
1293
static inline void tb_alloc_page(TranslationBlock *tb,
1294
                                 unsigned int n, tb_page_addr_t page_addr)
1295
{
1296
    PageDesc *p;
1297
#ifndef CONFIG_USER_ONLY
1298
    bool page_already_protected;
1299
#endif
1300

    
1301
    tb->page_addr[n] = page_addr;
1302
    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1303
    tb->page_next[n] = p->first_tb;
1304
#ifndef CONFIG_USER_ONLY
1305
    page_already_protected = p->first_tb != NULL;
1306
#endif
1307
    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1308
    invalidate_page_bitmap(p);
1309

    
1310
#if defined(TARGET_HAS_SMC) || 1
1311

    
1312
#if defined(CONFIG_USER_ONLY)
1313
    if (p->flags & PAGE_WRITE) {
1314
        target_ulong addr;
1315
        PageDesc *p2;
1316
        int prot;
1317

    
1318
        /* force the host page as non writable (writes will have a
1319
           page fault + mprotect overhead) */
1320
        page_addr &= qemu_host_page_mask;
1321
        prot = 0;
1322
        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1323
            addr += TARGET_PAGE_SIZE) {
1324

    
1325
            p2 = page_find (addr >> TARGET_PAGE_BITS);
1326
            if (!p2)
1327
                continue;
1328
            prot |= p2->flags;
1329
            p2->flags &= ~PAGE_WRITE;
1330
          }
1331
        mprotect(g2h(page_addr), qemu_host_page_size,
1332
                 (prot & PAGE_BITS) & ~PAGE_WRITE);
1333
#ifdef DEBUG_TB_INVALIDATE
1334
        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1335
               page_addr);
1336
#endif
1337
    }
1338
#else
1339
    /* if some code is already present, then the pages are already
1340
       protected. So we handle the case where only the first TB is
1341
       allocated in a physical page */
1342
    if (!page_already_protected) {
1343
        tlb_protect_code(page_addr);
1344
    }
1345
#endif
1346

    
1347
#endif /* TARGET_HAS_SMC */
1348
}
1349

    
1350
/* add a new TB and link it to the physical page tables. phys_page2 is
1351
   (-1) to indicate that only one page contains the TB. */
1352
void tb_link_page(TranslationBlock *tb,
1353
                  tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1354
{
1355
    unsigned int h;
1356
    TranslationBlock **ptb;
1357

    
1358
    /* Grab the mmap lock to stop another thread invalidating this TB
1359
       before we are done.  */
1360
    mmap_lock();
1361
    /* add in the physical hash table */
1362
    h = tb_phys_hash_func(phys_pc);
1363
    ptb = &tb_phys_hash[h];
1364
    tb->phys_hash_next = *ptb;
1365
    *ptb = tb;
1366

    
1367
    /* add in the page list */
1368
    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1369
    if (phys_page2 != -1)
1370
        tb_alloc_page(tb, 1, phys_page2);
1371
    else
1372
        tb->page_addr[1] = -1;
1373

    
1374
    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1375
    tb->jmp_next[0] = NULL;
1376
    tb->jmp_next[1] = NULL;
1377

    
1378
    /* init original jump addresses */
1379
    if (tb->tb_next_offset[0] != 0xffff)
1380
        tb_reset_jump(tb, 0);
1381
    if (tb->tb_next_offset[1] != 0xffff)
1382
        tb_reset_jump(tb, 1);
1383

    
1384
#ifdef DEBUG_TB_CHECK
1385
    tb_page_check();
1386
#endif
1387
    mmap_unlock();
1388
}
1389

    
1390
/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1391
   tb[1].tc_ptr. Return NULL if not found */
1392
TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1393
{
1394
    int m_min, m_max, m;
1395
    uintptr_t v;
1396
    TranslationBlock *tb;
1397

    
1398
    if (nb_tbs <= 0)
1399
        return NULL;
1400
    if (tc_ptr < (uintptr_t)code_gen_buffer ||
1401
        tc_ptr >= (uintptr_t)code_gen_ptr) {
1402
        return NULL;
1403
    }
1404
    /* binary search (cf Knuth) */
1405
    m_min = 0;
1406
    m_max = nb_tbs - 1;
1407
    while (m_min <= m_max) {
1408
        m = (m_min + m_max) >> 1;
1409
        tb = &tbs[m];
1410
        v = (uintptr_t)tb->tc_ptr;
1411
        if (v == tc_ptr)
1412
            return tb;
1413
        else if (tc_ptr < v) {
1414
            m_max = m - 1;
1415
        } else {
1416
            m_min = m + 1;
1417
        }
1418
    }
1419
    return &tbs[m_max];
1420
}
1421

    
1422
static void tb_reset_jump_recursive(TranslationBlock *tb);
1423

    
1424
static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1425
{
1426
    TranslationBlock *tb1, *tb_next, **ptb;
1427
    unsigned int n1;
1428

    
1429
    tb1 = tb->jmp_next[n];
1430
    if (tb1 != NULL) {
1431
        /* find head of list */
1432
        for(;;) {
1433
            n1 = (uintptr_t)tb1 & 3;
1434
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1435
            if (n1 == 2)
1436
                break;
1437
            tb1 = tb1->jmp_next[n1];
1438
        }
1439
        /* we are now sure now that tb jumps to tb1 */
1440
        tb_next = tb1;
1441

    
1442
        /* remove tb from the jmp_first list */
1443
        ptb = &tb_next->jmp_first;
1444
        for(;;) {
1445
            tb1 = *ptb;
1446
            n1 = (uintptr_t)tb1 & 3;
1447
            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1448
            if (n1 == n && tb1 == tb)
1449
                break;
1450
            ptb = &tb1->jmp_next[n1];
1451
        }
1452
        *ptb = tb->jmp_next[n];
1453
        tb->jmp_next[n] = NULL;
1454

    
1455
        /* suppress the jump to next tb in generated code */
1456
        tb_reset_jump(tb, n);
1457

    
1458
        /* suppress jumps in the tb on which we could have jumped */
1459
        tb_reset_jump_recursive(tb_next);
1460
    }
1461
}
1462

    
1463
static void tb_reset_jump_recursive(TranslationBlock *tb)
1464
{
1465
    tb_reset_jump_recursive2(tb, 0);
1466
    tb_reset_jump_recursive2(tb, 1);
1467
}
1468

    
1469
#if defined(TARGET_HAS_ICE)
1470
#if defined(CONFIG_USER_ONLY)
1471
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1472
{
1473
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
1474
}
1475
#else
1476
void tb_invalidate_phys_addr(hwaddr addr)
1477
{
1478
    ram_addr_t ram_addr;
1479
    MemoryRegionSection *section;
1480

    
1481
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1482
    if (!(memory_region_is_ram(section->mr)
1483
          || (section->mr->rom_device && section->mr->readable))) {
1484
        return;
1485
    }
1486
    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1487
        + memory_region_section_addr(section, addr);
1488
    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1489
}
1490

    
1491
static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1492
{
1493
    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1494
            (pc & ~TARGET_PAGE_MASK));
1495
}
1496
#endif
1497
#endif /* TARGET_HAS_ICE */
1498

    
1499
#if defined(CONFIG_USER_ONLY)
1500
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1501

    
1502
{
1503
}
1504

    
1505
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1506
                          int flags, CPUWatchpoint **watchpoint)
1507
{
1508
    return -ENOSYS;
1509
}
1510
#else
1511
/* Add a watchpoint.  */
1512
int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1513
                          int flags, CPUWatchpoint **watchpoint)
1514
{
1515
    target_ulong len_mask = ~(len - 1);
1516
    CPUWatchpoint *wp;
1517

    
1518
    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1519
    if ((len & (len - 1)) || (addr & ~len_mask) ||
1520
            len == 0 || len > TARGET_PAGE_SIZE) {
1521
        fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1522
                TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1523
        return -EINVAL;
1524
    }
1525
    wp = g_malloc(sizeof(*wp));
1526

    
1527
    wp->vaddr = addr;
1528
    wp->len_mask = len_mask;
1529
    wp->flags = flags;
1530

    
1531
    /* keep all GDB-injected watchpoints in front */
1532
    if (flags & BP_GDB)
1533
        QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1534
    else
1535
        QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1536

    
1537
    tlb_flush_page(env, addr);
1538

    
1539
    if (watchpoint)
1540
        *watchpoint = wp;
1541
    return 0;
1542
}
1543

    
1544
/* Remove a specific watchpoint.  */
1545
int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1546
                          int flags)
1547
{
1548
    target_ulong len_mask = ~(len - 1);
1549
    CPUWatchpoint *wp;
1550

    
1551
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1552
        if (addr == wp->vaddr && len_mask == wp->len_mask
1553
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1554
            cpu_watchpoint_remove_by_ref(env, wp);
1555
            return 0;
1556
        }
1557
    }
1558
    return -ENOENT;
1559
}
1560

    
1561
/* Remove a specific watchpoint by reference.  */
1562
void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1563
{
1564
    QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1565

    
1566
    tlb_flush_page(env, watchpoint->vaddr);
1567

    
1568
    g_free(watchpoint);
1569
}
1570

    
1571
/* Remove all matching watchpoints.  */
1572
void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1573
{
1574
    CPUWatchpoint *wp, *next;
1575

    
1576
    QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1577
        if (wp->flags & mask)
1578
            cpu_watchpoint_remove_by_ref(env, wp);
1579
    }
1580
}
1581
#endif
1582

    
1583
/* Add a breakpoint.  */
1584
int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1585
                          CPUBreakpoint **breakpoint)
1586
{
1587
#if defined(TARGET_HAS_ICE)
1588
    CPUBreakpoint *bp;
1589

    
1590
    bp = g_malloc(sizeof(*bp));
1591

    
1592
    bp->pc = pc;
1593
    bp->flags = flags;
1594

    
1595
    /* keep all GDB-injected breakpoints in front */
1596
    if (flags & BP_GDB)
1597
        QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1598
    else
1599
        QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1600

    
1601
    breakpoint_invalidate(env, pc);
1602

    
1603
    if (breakpoint)
1604
        *breakpoint = bp;
1605
    return 0;
1606
#else
1607
    return -ENOSYS;
1608
#endif
1609
}
1610

    
1611
/* Remove a specific breakpoint.  */
1612
int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1613
{
1614
#if defined(TARGET_HAS_ICE)
1615
    CPUBreakpoint *bp;
1616

    
1617
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1618
        if (bp->pc == pc && bp->flags == flags) {
1619
            cpu_breakpoint_remove_by_ref(env, bp);
1620
            return 0;
1621
        }
1622
    }
1623
    return -ENOENT;
1624
#else
1625
    return -ENOSYS;
1626
#endif
1627
}
1628

    
1629
/* Remove a specific breakpoint by reference.  */
1630
void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1631
{
1632
#if defined(TARGET_HAS_ICE)
1633
    QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1634

    
1635
    breakpoint_invalidate(env, breakpoint->pc);
1636

    
1637
    g_free(breakpoint);
1638
#endif
1639
}
1640

    
1641
/* Remove all matching breakpoints. */
1642
void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1643
{
1644
#if defined(TARGET_HAS_ICE)
1645
    CPUBreakpoint *bp, *next;
1646

    
1647
    QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1648
        if (bp->flags & mask)
1649
            cpu_breakpoint_remove_by_ref(env, bp);
1650
    }
1651
#endif
1652
}
1653

    
1654
/* enable or disable single step mode. EXCP_DEBUG is returned by the
1655
   CPU loop after each instruction */
1656
void cpu_single_step(CPUArchState *env, int enabled)
1657
{
1658
#if defined(TARGET_HAS_ICE)
1659
    if (env->singlestep_enabled != enabled) {
1660
        env->singlestep_enabled = enabled;
1661
        if (kvm_enabled())
1662
            kvm_update_guest_debug(env, 0);
1663
        else {
1664
            /* must flush all the translated code to avoid inconsistencies */
1665
            /* XXX: only flush what is necessary */
1666
            tb_flush(env);
1667
        }
1668
    }
1669
#endif
1670
}
1671

    
1672
static void cpu_unlink_tb(CPUArchState *env)
1673
{
1674
    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
1675
       problem and hope the cpu will stop of its own accord.  For userspace
1676
       emulation this often isn't actually as bad as it sounds.  Often
1677
       signals are used primarily to interrupt blocking syscalls.  */
1678
    TranslationBlock *tb;
1679
    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1680

    
1681
    spin_lock(&interrupt_lock);
1682
    tb = env->current_tb;
1683
    /* if the cpu is currently executing code, we must unlink it and
1684
       all the potentially executing TB */
1685
    if (tb) {
1686
        env->current_tb = NULL;
1687
        tb_reset_jump_recursive(tb);
1688
    }
1689
    spin_unlock(&interrupt_lock);
1690
}
1691

    
1692
#ifndef CONFIG_USER_ONLY
1693
/* mask must never be zero, except for A20 change call */
1694
static void tcg_handle_interrupt(CPUArchState *env, int mask)
1695
{
1696
    int old_mask;
1697

    
1698
    old_mask = env->interrupt_request;
1699
    env->interrupt_request |= mask;
1700

    
1701
    /*
1702
     * If called from iothread context, wake the target cpu in
1703
     * case its halted.
1704
     */
1705
    if (!qemu_cpu_is_self(env)) {
1706
        qemu_cpu_kick(env);
1707
        return;
1708
    }
1709

    
1710
    if (use_icount) {
1711
        env->icount_decr.u16.high = 0xffff;
1712
        if (!can_do_io(env)
1713
            && (mask & ~old_mask) != 0) {
1714
            cpu_abort(env, "Raised interrupt while not in I/O function");
1715
        }
1716
    } else {
1717
        cpu_unlink_tb(env);
1718
    }
1719
}
1720

    
1721
CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1722

    
1723
#else /* CONFIG_USER_ONLY */
1724

    
1725
void cpu_interrupt(CPUArchState *env, int mask)
1726
{
1727
    env->interrupt_request |= mask;
1728
    cpu_unlink_tb(env);
1729
}
1730
#endif /* CONFIG_USER_ONLY */
1731

    
1732
void cpu_reset_interrupt(CPUArchState *env, int mask)
1733
{
1734
    env->interrupt_request &= ~mask;
1735
}
1736

    
1737
void cpu_exit(CPUArchState *env)
1738
{
1739
    env->exit_request = 1;
1740
    cpu_unlink_tb(env);
1741
}
1742

    
1743
void cpu_abort(CPUArchState *env, const char *fmt, ...)
1744
{
1745
    va_list ap;
1746
    va_list ap2;
1747

    
1748
    va_start(ap, fmt);
1749
    va_copy(ap2, ap);
1750
    fprintf(stderr, "qemu: fatal: ");
1751
    vfprintf(stderr, fmt, ap);
1752
    fprintf(stderr, "\n");
1753
    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1754
    if (qemu_log_enabled()) {
1755
        qemu_log("qemu: fatal: ");
1756
        qemu_log_vprintf(fmt, ap2);
1757
        qemu_log("\n");
1758
        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1759
        qemu_log_flush();
1760
        qemu_log_close();
1761
    }
1762
    va_end(ap2);
1763
    va_end(ap);
1764
#if defined(CONFIG_USER_ONLY)
1765
    {
1766
        struct sigaction act;
1767
        sigfillset(&act.sa_mask);
1768
        act.sa_handler = SIG_DFL;
1769
        sigaction(SIGABRT, &act, NULL);
1770
    }
1771
#endif
1772
    abort();
1773
}
1774

    
1775
CPUArchState *cpu_copy(CPUArchState *env)
1776
{
1777
    CPUArchState *new_env = cpu_init(env->cpu_model_str);
1778
    CPUArchState *next_cpu = new_env->next_cpu;
1779
    int cpu_index = new_env->cpu_index;
1780
#if defined(TARGET_HAS_ICE)
1781
    CPUBreakpoint *bp;
1782
    CPUWatchpoint *wp;
1783
#endif
1784

    
1785
    memcpy(new_env, env, sizeof(CPUArchState));
1786

    
1787
    /* Preserve chaining and index. */
1788
    new_env->next_cpu = next_cpu;
1789
    new_env->cpu_index = cpu_index;
1790

    
1791
    /* Clone all break/watchpoints.
1792
       Note: Once we support ptrace with hw-debug register access, make sure
1793
       BP_CPU break/watchpoints are handled correctly on clone. */
1794
    QTAILQ_INIT(&env->breakpoints);
1795
    QTAILQ_INIT(&env->watchpoints);
1796
#if defined(TARGET_HAS_ICE)
1797
    QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1798
        cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1799
    }
1800
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1801
        cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1802
                              wp->flags, NULL);
1803
    }
1804
#endif
1805

    
1806
    return new_env;
1807
}
1808

    
1809
#if !defined(CONFIG_USER_ONLY)
1810
void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1811
{
1812
    unsigned int i;
1813

    
1814
    /* Discard jump cache entries for any tb which might potentially
1815
       overlap the flushed page.  */
1816
    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1817
    memset (&env->tb_jmp_cache[i], 0, 
1818
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1819

    
1820
    i = tb_jmp_cache_hash_page(addr);
1821
    memset (&env->tb_jmp_cache[i], 0, 
1822
            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1823
}
1824

    
1825
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1826
                                      uintptr_t length)
1827
{
1828
    uintptr_t start1;
1829

    
1830
    /* we modify the TLB cache so that the dirty bit will be set again
1831
       when accessing the range */
1832
    start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1833
    /* Check that we don't span multiple blocks - this breaks the
1834
       address comparisons below.  */
1835
    if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1836
            != (end - 1) - start) {
1837
        abort();
1838
    }
1839
    cpu_tlb_reset_dirty_all(start1, length);
1840

    
1841
}
1842

    
1843
/* Note: start and end must be within the same ram block.  */
1844
void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1845
                                     int dirty_flags)
1846
{
1847
    uintptr_t length;
1848

    
1849
    start &= TARGET_PAGE_MASK;
1850
    end = TARGET_PAGE_ALIGN(end);
1851

    
1852
    length = end - start;
1853
    if (length == 0)
1854
        return;
1855
    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1856

    
1857
    if (tcg_enabled()) {
1858
        tlb_reset_dirty_range_all(start, end, length);
1859
    }
1860
}
1861

    
1862
int cpu_physical_memory_set_dirty_tracking(int enable)
1863
{
1864
    int ret = 0;
1865
    in_migration = enable;
1866
    return ret;
1867
}
1868

    
1869
hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1870
                                                   MemoryRegionSection *section,
1871
                                                   target_ulong vaddr,
1872
                                                   hwaddr paddr,
1873
                                                   int prot,
1874
                                                   target_ulong *address)
1875
{
1876
    hwaddr iotlb;
1877
    CPUWatchpoint *wp;
1878

    
1879
    if (memory_region_is_ram(section->mr)) {
1880
        /* Normal RAM.  */
1881
        iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1882
            + memory_region_section_addr(section, paddr);
1883
        if (!section->readonly) {
1884
            iotlb |= phys_section_notdirty;
1885
        } else {
1886
            iotlb |= phys_section_rom;
1887
        }
1888
    } else {
1889
        /* IO handlers are currently passed a physical address.
1890
           It would be nice to pass an offset from the base address
1891
           of that region.  This would avoid having to special case RAM,
1892
           and avoid full address decoding in every device.
1893
           We can't use the high bits of pd for this because
1894
           IO_MEM_ROMD uses these as a ram address.  */
1895
        iotlb = section - phys_sections;
1896
        iotlb += memory_region_section_addr(section, paddr);
1897
    }
1898

    
1899
    /* Make accesses to pages with watchpoints go via the
1900
       watchpoint trap routines.  */
1901
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1902
        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1903
            /* Avoid trapping reads of pages with a write breakpoint. */
1904
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1905
                iotlb = phys_section_watch + paddr;
1906
                *address |= TLB_MMIO;
1907
                break;
1908
            }
1909
        }
1910
    }
1911

    
1912
    return iotlb;
1913
}
1914

    
1915
#else
1916
/*
1917
 * Walks guest process memory "regions" one by one
1918
 * and calls callback function 'fn' for each region.
1919
 */
1920

    
1921
struct walk_memory_regions_data
1922
{
1923
    walk_memory_regions_fn fn;
1924
    void *priv;
1925
    uintptr_t start;
1926
    int prot;
1927
};
1928

    
1929
static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1930
                                   abi_ulong end, int new_prot)
1931
{
1932
    if (data->start != -1ul) {
1933
        int rc = data->fn(data->priv, data->start, end, data->prot);
1934
        if (rc != 0) {
1935
            return rc;
1936
        }
1937
    }
1938

    
1939
    data->start = (new_prot ? end : -1ul);
1940
    data->prot = new_prot;
1941

    
1942
    return 0;
1943
}
1944

    
1945
static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1946
                                 abi_ulong base, int level, void **lp)
1947
{
1948
    abi_ulong pa;
1949
    int i, rc;
1950

    
1951
    if (*lp == NULL) {
1952
        return walk_memory_regions_end(data, base, 0);
1953
    }
1954

    
1955
    if (level == 0) {
1956
        PageDesc *pd = *lp;
1957
        for (i = 0; i < L2_SIZE; ++i) {
1958
            int prot = pd[i].flags;
1959

    
1960
            pa = base | (i << TARGET_PAGE_BITS);
1961
            if (prot != data->prot) {
1962
                rc = walk_memory_regions_end(data, pa, prot);
1963
                if (rc != 0) {
1964
                    return rc;
1965
                }
1966
            }
1967
        }
1968
    } else {
1969
        void **pp = *lp;
1970
        for (i = 0; i < L2_SIZE; ++i) {
1971
            pa = base | ((abi_ulong)i <<
1972
                (TARGET_PAGE_BITS + L2_BITS * level));
1973
            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1974
            if (rc != 0) {
1975
                return rc;
1976
            }
1977
        }
1978
    }
1979

    
1980
    return 0;
1981
}
1982

    
1983
int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1984
{
1985
    struct walk_memory_regions_data data;
1986
    uintptr_t i;
1987

    
1988
    data.fn = fn;
1989
    data.priv = priv;
1990
    data.start = -1ul;
1991
    data.prot = 0;
1992

    
1993
    for (i = 0; i < V_L1_SIZE; i++) {
1994
        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1995
                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1996
        if (rc != 0) {
1997
            return rc;
1998
        }
1999
    }
2000

    
2001
    return walk_memory_regions_end(&data, 0, 0);
2002
}
2003

    
2004
static int dump_region(void *priv, abi_ulong start,
2005
    abi_ulong end, unsigned long prot)
2006
{
2007
    FILE *f = (FILE *)priv;
2008

    
2009
    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2010
        " "TARGET_ABI_FMT_lx" %c%c%c\n",
2011
        start, end, end - start,
2012
        ((prot & PAGE_READ) ? 'r' : '-'),
2013
        ((prot & PAGE_WRITE) ? 'w' : '-'),
2014
        ((prot & PAGE_EXEC) ? 'x' : '-'));
2015

    
2016
    return (0);
2017
}
2018

    
2019
/* dump memory mappings */
2020
void page_dump(FILE *f)
2021
{
2022
    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2023
            "start", "end", "size", "prot");
2024
    walk_memory_regions(f, dump_region);
2025
}
2026

    
2027
int page_get_flags(target_ulong address)
2028
{
2029
    PageDesc *p;
2030

    
2031
    p = page_find(address >> TARGET_PAGE_BITS);
2032
    if (!p)
2033
        return 0;
2034
    return p->flags;
2035
}
2036

    
2037
/* Modify the flags of a page and invalidate the code if necessary.
2038
   The flag PAGE_WRITE_ORG is positioned automatically depending
2039
   on PAGE_WRITE.  The mmap_lock should already be held.  */
2040
void page_set_flags(target_ulong start, target_ulong end, int flags)
2041
{
2042
    target_ulong addr, len;
2043

    
2044
    /* This function should never be called with addresses outside the
2045
       guest address space.  If this assert fires, it probably indicates
2046
       a missing call to h2g_valid.  */
2047
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2048
    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2049
#endif
2050
    assert(start < end);
2051

    
2052
    start = start & TARGET_PAGE_MASK;
2053
    end = TARGET_PAGE_ALIGN(end);
2054

    
2055
    if (flags & PAGE_WRITE) {
2056
        flags |= PAGE_WRITE_ORG;
2057
    }
2058

    
2059
    for (addr = start, len = end - start;
2060
         len != 0;
2061
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2062
        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2063

    
2064
        /* If the write protection bit is set, then we invalidate
2065
           the code inside.  */
2066
        if (!(p->flags & PAGE_WRITE) &&
2067
            (flags & PAGE_WRITE) &&
2068
            p->first_tb) {
2069
            tb_invalidate_phys_page(addr, 0, NULL);
2070
        }
2071
        p->flags = flags;
2072
    }
2073
}
2074

    
2075
int page_check_range(target_ulong start, target_ulong len, int flags)
2076
{
2077
    PageDesc *p;
2078
    target_ulong end;
2079
    target_ulong addr;
2080

    
2081
    /* This function should never be called with addresses outside the
2082
       guest address space.  If this assert fires, it probably indicates
2083
       a missing call to h2g_valid.  */
2084
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2085
    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2086
#endif
2087

    
2088
    if (len == 0) {
2089
        return 0;
2090
    }
2091
    if (start + len - 1 < start) {
2092
        /* We've wrapped around.  */
2093
        return -1;
2094
    }
2095

    
2096
    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2097
    start = start & TARGET_PAGE_MASK;
2098

    
2099
    for (addr = start, len = end - start;
2100
         len != 0;
2101
         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2102
        p = page_find(addr >> TARGET_PAGE_BITS);
2103
        if( !p )
2104
            return -1;
2105
        if( !(p->flags & PAGE_VALID) )
2106
            return -1;
2107

    
2108
        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2109
            return -1;
2110
        if (flags & PAGE_WRITE) {
2111
            if (!(p->flags & PAGE_WRITE_ORG))
2112
                return -1;
2113
            /* unprotect the page if it was put read-only because it
2114
               contains translated code */
2115
            if (!(p->flags & PAGE_WRITE)) {
2116
                if (!page_unprotect(addr, 0, NULL))
2117
                    return -1;
2118
            }
2119
            return 0;
2120
        }
2121
    }
2122
    return 0;
2123
}
2124

    
2125
/* called from signal handler: invalidate the code and unprotect the
2126
   page. Return TRUE if the fault was successfully handled. */
2127
int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2128
{
2129
    unsigned int prot;
2130
    PageDesc *p;
2131
    target_ulong host_start, host_end, addr;
2132

    
2133
    /* Technically this isn't safe inside a signal handler.  However we
2134
       know this only ever happens in a synchronous SEGV handler, so in
2135
       practice it seems to be ok.  */
2136
    mmap_lock();
2137

    
2138
    p = page_find(address >> TARGET_PAGE_BITS);
2139
    if (!p) {
2140
        mmap_unlock();
2141
        return 0;
2142
    }
2143

    
2144
    /* if the page was really writable, then we change its
2145
       protection back to writable */
2146
    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2147
        host_start = address & qemu_host_page_mask;
2148
        host_end = host_start + qemu_host_page_size;
2149

    
2150
        prot = 0;
2151
        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2152
            p = page_find(addr >> TARGET_PAGE_BITS);
2153
            p->flags |= PAGE_WRITE;
2154
            prot |= p->flags;
2155

    
2156
            /* and since the content will be modified, we must invalidate
2157
               the corresponding translated code. */
2158
            tb_invalidate_phys_page(addr, pc, puc);
2159
#ifdef DEBUG_TB_CHECK
2160
            tb_invalidate_check(addr);
2161
#endif
2162
        }
2163
        mprotect((void *)g2h(host_start), qemu_host_page_size,
2164
                 prot & PAGE_BITS);
2165

    
2166
        mmap_unlock();
2167
        return 1;
2168
    }
2169
    mmap_unlock();
2170
    return 0;
2171
}
2172
#endif /* defined(CONFIG_USER_ONLY) */
2173

    
2174
#if !defined(CONFIG_USER_ONLY)
2175

    
2176
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2177
typedef struct subpage_t {
2178
    MemoryRegion iomem;
2179
    hwaddr base;
2180
    uint16_t sub_section[TARGET_PAGE_SIZE];
2181
} subpage_t;
2182

    
2183
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2184
                             uint16_t section);
2185
static subpage_t *subpage_init(hwaddr base);
2186
static void destroy_page_desc(uint16_t section_index)
2187
{
2188
    MemoryRegionSection *section = &phys_sections[section_index];
2189
    MemoryRegion *mr = section->mr;
2190

    
2191
    if (mr->subpage) {
2192
        subpage_t *subpage = container_of(mr, subpage_t, iomem);
2193
        memory_region_destroy(&subpage->iomem);
2194
        g_free(subpage);
2195
    }
2196
}
2197

    
2198
static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2199
{
2200
    unsigned i;
2201
    PhysPageEntry *p;
2202

    
2203
    if (lp->ptr == PHYS_MAP_NODE_NIL) {
2204
        return;
2205
    }
2206

    
2207
    p = phys_map_nodes[lp->ptr];
2208
    for (i = 0; i < L2_SIZE; ++i) {
2209
        if (!p[i].is_leaf) {
2210
            destroy_l2_mapping(&p[i], level - 1);
2211
        } else {
2212
            destroy_page_desc(p[i].ptr);
2213
        }
2214
    }
2215
    lp->is_leaf = 0;
2216
    lp->ptr = PHYS_MAP_NODE_NIL;
2217
}
2218

    
2219
static void destroy_all_mappings(AddressSpaceDispatch *d)
2220
{
2221
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2222
    phys_map_nodes_reset();
2223
}
2224

    
2225
static uint16_t phys_section_add(MemoryRegionSection *section)
2226
{
2227
    if (phys_sections_nb == phys_sections_nb_alloc) {
2228
        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2229
        phys_sections = g_renew(MemoryRegionSection, phys_sections,
2230
                                phys_sections_nb_alloc);
2231
    }
2232
    phys_sections[phys_sections_nb] = *section;
2233
    return phys_sections_nb++;
2234
}
2235

    
2236
static void phys_sections_clear(void)
2237
{
2238
    phys_sections_nb = 0;
2239
}
2240

    
2241
static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2242
{
2243
    subpage_t *subpage;
2244
    hwaddr base = section->offset_within_address_space
2245
        & TARGET_PAGE_MASK;
2246
    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2247
    MemoryRegionSection subsection = {
2248
        .offset_within_address_space = base,
2249
        .size = TARGET_PAGE_SIZE,
2250
    };
2251
    hwaddr start, end;
2252

    
2253
    assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2254

    
2255
    if (!(existing->mr->subpage)) {
2256
        subpage = subpage_init(base);
2257
        subsection.mr = &subpage->iomem;
2258
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2259
                      phys_section_add(&subsection));
2260
    } else {
2261
        subpage = container_of(existing->mr, subpage_t, iomem);
2262
    }
2263
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2264
    end = start + section->size - 1;
2265
    subpage_register(subpage, start, end, phys_section_add(section));
2266
}
2267

    
2268

    
2269
static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2270
{
2271
    hwaddr start_addr = section->offset_within_address_space;
2272
    ram_addr_t size = section->size;
2273
    hwaddr addr;
2274
    uint16_t section_index = phys_section_add(section);
2275

    
2276
    assert(size);
2277

    
2278
    addr = start_addr;
2279
    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2280
                  section_index);
2281
}
2282

    
2283
static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2284
{
2285
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2286
    MemoryRegionSection now = *section, remain = *section;
2287

    
2288
    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2289
        || (now.size < TARGET_PAGE_SIZE)) {
2290
        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2291
                       - now.offset_within_address_space,
2292
                       now.size);
2293
        register_subpage(d, &now);
2294
        remain.size -= now.size;
2295
        remain.offset_within_address_space += now.size;
2296
        remain.offset_within_region += now.size;
2297
    }
2298
    while (remain.size >= TARGET_PAGE_SIZE) {
2299
        now = remain;
2300
        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2301
            now.size = TARGET_PAGE_SIZE;
2302
            register_subpage(d, &now);
2303
        } else {
2304
            now.size &= TARGET_PAGE_MASK;
2305
            register_multipage(d, &now);
2306
        }
2307
        remain.size -= now.size;
2308
        remain.offset_within_address_space += now.size;
2309
        remain.offset_within_region += now.size;
2310
    }
2311
    now = remain;
2312
    if (now.size) {
2313
        register_subpage(d, &now);
2314
    }
2315
}
2316

    
2317
void qemu_flush_coalesced_mmio_buffer(void)
2318
{
2319
    if (kvm_enabled())
2320
        kvm_flush_coalesced_mmio_buffer();
2321
}
2322

    
2323
#if defined(__linux__) && !defined(TARGET_S390X)
2324

    
2325
#include <sys/vfs.h>
2326

    
2327
#define HUGETLBFS_MAGIC       0x958458f6
2328

    
2329
static long gethugepagesize(const char *path)
2330
{
2331
    struct statfs fs;
2332
    int ret;
2333

    
2334
    do {
2335
        ret = statfs(path, &fs);
2336
    } while (ret != 0 && errno == EINTR);
2337

    
2338
    if (ret != 0) {
2339
        perror(path);
2340
        return 0;
2341
    }
2342

    
2343
    if (fs.f_type != HUGETLBFS_MAGIC)
2344
        fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2345

    
2346
    return fs.f_bsize;
2347
}
2348

    
2349
static void *file_ram_alloc(RAMBlock *block,
2350
                            ram_addr_t memory,
2351
                            const char *path)
2352
{
2353
    char *filename;
2354
    void *area;
2355
    int fd;
2356
#ifdef MAP_POPULATE
2357
    int flags;
2358
#endif
2359
    unsigned long hpagesize;
2360

    
2361
    hpagesize = gethugepagesize(path);
2362
    if (!hpagesize) {
2363
        return NULL;
2364
    }
2365

    
2366
    if (memory < hpagesize) {
2367
        return NULL;
2368
    }
2369

    
2370
    if (kvm_enabled() && !kvm_has_sync_mmu()) {
2371
        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2372
        return NULL;
2373
    }
2374

    
2375
    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2376
        return NULL;
2377
    }
2378

    
2379
    fd = mkstemp(filename);
2380
    if (fd < 0) {
2381
        perror("unable to create backing store for hugepages");
2382
        free(filename);
2383
        return NULL;
2384
    }
2385
    unlink(filename);
2386
    free(filename);
2387

    
2388
    memory = (memory+hpagesize-1) & ~(hpagesize-1);
2389

    
2390
    /*
2391
     * ftruncate is not supported by hugetlbfs in older
2392
     * hosts, so don't bother bailing out on errors.
2393
     * If anything goes wrong with it under other filesystems,
2394
     * mmap will fail.
2395
     */
2396
    if (ftruncate(fd, memory))
2397
        perror("ftruncate");
2398

    
2399
#ifdef MAP_POPULATE
2400
    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2401
     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
2402
     * to sidestep this quirk.
2403
     */
2404
    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2405
    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2406
#else
2407
    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2408
#endif
2409
    if (area == MAP_FAILED) {
2410
        perror("file_ram_alloc: can't mmap RAM pages");
2411
        close(fd);
2412
        return (NULL);
2413
    }
2414
    block->fd = fd;
2415
    return area;
2416
}
2417
#endif
2418

    
2419
static ram_addr_t find_ram_offset(ram_addr_t size)
2420
{
2421
    RAMBlock *block, *next_block;
2422
    ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2423

    
2424
    if (QLIST_EMPTY(&ram_list.blocks))
2425
        return 0;
2426

    
2427
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2428
        ram_addr_t end, next = RAM_ADDR_MAX;
2429

    
2430
        end = block->offset + block->length;
2431

    
2432
        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2433
            if (next_block->offset >= end) {
2434
                next = MIN(next, next_block->offset);
2435
            }
2436
        }
2437
        if (next - end >= size && next - end < mingap) {
2438
            offset = end;
2439
            mingap = next - end;
2440
        }
2441
    }
2442

    
2443
    if (offset == RAM_ADDR_MAX) {
2444
        fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2445
                (uint64_t)size);
2446
        abort();
2447
    }
2448

    
2449
    return offset;
2450
}
2451

    
2452
ram_addr_t last_ram_offset(void)
2453
{
2454
    RAMBlock *block;
2455
    ram_addr_t last = 0;
2456

    
2457
    QLIST_FOREACH(block, &ram_list.blocks, next)
2458
        last = MAX(last, block->offset + block->length);
2459

    
2460
    return last;
2461
}
2462

    
2463
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2464
{
2465
    int ret;
2466
    QemuOpts *machine_opts;
2467

    
2468
    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2469
    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2470
    if (machine_opts &&
2471
        !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2472
        ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2473
        if (ret) {
2474
            perror("qemu_madvise");
2475
            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2476
                            "but dump_guest_core=off specified\n");
2477
        }
2478
    }
2479
}
2480

    
2481
void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2482
{
2483
    RAMBlock *new_block, *block;
2484

    
2485
    new_block = NULL;
2486
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2487
        if (block->offset == addr) {
2488
            new_block = block;
2489
            break;
2490
        }
2491
    }
2492
    assert(new_block);
2493
    assert(!new_block->idstr[0]);
2494

    
2495
    if (dev) {
2496
        char *id = qdev_get_dev_path(dev);
2497
        if (id) {
2498
            snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2499
            g_free(id);
2500
        }
2501
    }
2502
    pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2503

    
2504
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2505
        if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2506
            fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2507
                    new_block->idstr);
2508
            abort();
2509
        }
2510
    }
2511
}
2512

    
2513
static int memory_try_enable_merging(void *addr, size_t len)
2514
{
2515
    QemuOpts *opts;
2516

    
2517
    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2518
    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2519
        /* disabled by the user */
2520
        return 0;
2521
    }
2522

    
2523
    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2524
}
2525

    
2526
ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2527
                                   MemoryRegion *mr)
2528
{
2529
    RAMBlock *new_block;
2530

    
2531
    size = TARGET_PAGE_ALIGN(size);
2532
    new_block = g_malloc0(sizeof(*new_block));
2533

    
2534
    new_block->mr = mr;
2535
    new_block->offset = find_ram_offset(size);
2536
    if (host) {
2537
        new_block->host = host;
2538
        new_block->flags |= RAM_PREALLOC_MASK;
2539
    } else {
2540
        if (mem_path) {
2541
#if defined (__linux__) && !defined(TARGET_S390X)
2542
            new_block->host = file_ram_alloc(new_block, size, mem_path);
2543
            if (!new_block->host) {
2544
                new_block->host = qemu_vmalloc(size);
2545
                memory_try_enable_merging(new_block->host, size);
2546
            }
2547
#else
2548
            fprintf(stderr, "-mem-path option unsupported\n");
2549
            exit(1);
2550
#endif
2551
        } else {
2552
            if (xen_enabled()) {
2553
                xen_ram_alloc(new_block->offset, size, mr);
2554
            } else if (kvm_enabled()) {
2555
                /* some s390/kvm configurations have special constraints */
2556
                new_block->host = kvm_vmalloc(size);
2557
            } else {
2558
                new_block->host = qemu_vmalloc(size);
2559
            }
2560
            memory_try_enable_merging(new_block->host, size);
2561
        }
2562
    }
2563
    new_block->length = size;
2564

    
2565
    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2566

    
2567
    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2568
                                       last_ram_offset() >> TARGET_PAGE_BITS);
2569
    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2570
           0, size >> TARGET_PAGE_BITS);
2571
    cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2572

    
2573
    qemu_ram_setup_dump(new_block->host, size);
2574
    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2575

    
2576
    if (kvm_enabled())
2577
        kvm_setup_guest_memory(new_block->host, size);
2578

    
2579
    return new_block->offset;
2580
}
2581

    
2582
ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2583
{
2584
    return qemu_ram_alloc_from_ptr(size, NULL, mr);
2585
}
2586

    
2587
void qemu_ram_free_from_ptr(ram_addr_t addr)
2588
{
2589
    RAMBlock *block;
2590

    
2591
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2592
        if (addr == block->offset) {
2593
            QLIST_REMOVE(block, next);
2594
            g_free(block);
2595
            return;
2596
        }
2597
    }
2598
}
2599

    
2600
void qemu_ram_free(ram_addr_t addr)
2601
{
2602
    RAMBlock *block;
2603

    
2604
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2605
        if (addr == block->offset) {
2606
            QLIST_REMOVE(block, next);
2607
            if (block->flags & RAM_PREALLOC_MASK) {
2608
                ;
2609
            } else if (mem_path) {
2610
#if defined (__linux__) && !defined(TARGET_S390X)
2611
                if (block->fd) {
2612
                    munmap(block->host, block->length);
2613
                    close(block->fd);
2614
                } else {
2615
                    qemu_vfree(block->host);
2616
                }
2617
#else
2618
                abort();
2619
#endif
2620
            } else {
2621
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2622
                munmap(block->host, block->length);
2623
#else
2624
                if (xen_enabled()) {
2625
                    xen_invalidate_map_cache_entry(block->host);
2626
                } else {
2627
                    qemu_vfree(block->host);
2628
                }
2629
#endif
2630
            }
2631
            g_free(block);
2632
            return;
2633
        }
2634
    }
2635

    
2636
}
2637

    
2638
#ifndef _WIN32
2639
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2640
{
2641
    RAMBlock *block;
2642
    ram_addr_t offset;
2643
    int flags;
2644
    void *area, *vaddr;
2645

    
2646
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2647
        offset = addr - block->offset;
2648
        if (offset < block->length) {
2649
            vaddr = block->host + offset;
2650
            if (block->flags & RAM_PREALLOC_MASK) {
2651
                ;
2652
            } else {
2653
                flags = MAP_FIXED;
2654
                munmap(vaddr, length);
2655
                if (mem_path) {
2656
#if defined(__linux__) && !defined(TARGET_S390X)
2657
                    if (block->fd) {
2658
#ifdef MAP_POPULATE
2659
                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2660
                            MAP_PRIVATE;
2661
#else
2662
                        flags |= MAP_PRIVATE;
2663
#endif
2664
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2665
                                    flags, block->fd, offset);
2666
                    } else {
2667
                        flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2668
                        area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2669
                                    flags, -1, 0);
2670
                    }
2671
#else
2672
                    abort();
2673
#endif
2674
                } else {
2675
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2676
                    flags |= MAP_SHARED | MAP_ANONYMOUS;
2677
                    area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2678
                                flags, -1, 0);
2679
#else
2680
                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2681
                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2682
                                flags, -1, 0);
2683
#endif
2684
                }
2685
                if (area != vaddr) {
2686
                    fprintf(stderr, "Could not remap addr: "
2687
                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2688
                            length, addr);
2689
                    exit(1);
2690
                }
2691
                memory_try_enable_merging(vaddr, length);
2692
                qemu_ram_setup_dump(vaddr, length);
2693
            }
2694
            return;
2695
        }
2696
    }
2697
}
2698
#endif /* !_WIN32 */
2699

    
2700
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2701
   With the exception of the softmmu code in this file, this should
2702
   only be used for local memory (e.g. video ram) that the device owns,
2703
   and knows it isn't going to access beyond the end of the block.
2704

2705
   It should not be used for general purpose DMA.
2706
   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2707
 */
2708
void *qemu_get_ram_ptr(ram_addr_t addr)
2709
{
2710
    RAMBlock *block;
2711

    
2712
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2713
        if (addr - block->offset < block->length) {
2714
            /* Move this entry to to start of the list.  */
2715
            if (block != QLIST_FIRST(&ram_list.blocks)) {
2716
                QLIST_REMOVE(block, next);
2717
                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2718
            }
2719
            if (xen_enabled()) {
2720
                /* We need to check if the requested address is in the RAM
2721
                 * because we don't want to map the entire memory in QEMU.
2722
                 * In that case just map until the end of the page.
2723
                 */
2724
                if (block->offset == 0) {
2725
                    return xen_map_cache(addr, 0, 0);
2726
                } else if (block->host == NULL) {
2727
                    block->host =
2728
                        xen_map_cache(block->offset, block->length, 1);
2729
                }
2730
            }
2731
            return block->host + (addr - block->offset);
2732
        }
2733
    }
2734

    
2735
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2736
    abort();
2737

    
2738
    return NULL;
2739
}
2740

    
2741
/* Return a host pointer to ram allocated with qemu_ram_alloc.
2742
 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2743
 */
2744
void *qemu_safe_ram_ptr(ram_addr_t addr)
2745
{
2746
    RAMBlock *block;
2747

    
2748
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2749
        if (addr - block->offset < block->length) {
2750
            if (xen_enabled()) {
2751
                /* We need to check if the requested address is in the RAM
2752
                 * because we don't want to map the entire memory in QEMU.
2753
                 * In that case just map until the end of the page.
2754
                 */
2755
                if (block->offset == 0) {
2756
                    return xen_map_cache(addr, 0, 0);
2757
                } else if (block->host == NULL) {
2758
                    block->host =
2759
                        xen_map_cache(block->offset, block->length, 1);
2760
                }
2761
            }
2762
            return block->host + (addr - block->offset);
2763
        }
2764
    }
2765

    
2766
    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2767
    abort();
2768

    
2769
    return NULL;
2770
}
2771

    
2772
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2773
 * but takes a size argument */
2774
void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2775
{
2776
    if (*size == 0) {
2777
        return NULL;
2778
    }
2779
    if (xen_enabled()) {
2780
        return xen_map_cache(addr, *size, 1);
2781
    } else {
2782
        RAMBlock *block;
2783

    
2784
        QLIST_FOREACH(block, &ram_list.blocks, next) {
2785
            if (addr - block->offset < block->length) {
2786
                if (addr - block->offset + *size > block->length)
2787
                    *size = block->length - addr + block->offset;
2788
                return block->host + (addr - block->offset);
2789
            }
2790
        }
2791

    
2792
        fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2793
        abort();
2794
    }
2795
}
2796

    
2797
void qemu_put_ram_ptr(void *addr)
2798
{
2799
    trace_qemu_put_ram_ptr(addr);
2800
}
2801

    
2802
int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2803
{
2804
    RAMBlock *block;
2805
    uint8_t *host = ptr;
2806

    
2807
    if (xen_enabled()) {
2808
        *ram_addr = xen_ram_addr_from_mapcache(ptr);
2809
        return 0;
2810
    }
2811

    
2812
    QLIST_FOREACH(block, &ram_list.blocks, next) {
2813
        /* This case append when the block is not mapped. */
2814
        if (block->host == NULL) {
2815
            continue;
2816
        }
2817
        if (host - block->host < block->length) {
2818
            *ram_addr = block->offset + (host - block->host);
2819
            return 0;
2820
        }
2821
    }
2822

    
2823
    return -1;
2824
}
2825

    
2826
/* Some of the softmmu routines need to translate from a host pointer
2827
   (typically a TLB entry) back to a ram offset.  */
2828
ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2829
{
2830
    ram_addr_t ram_addr;
2831

    
2832
    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2833
        fprintf(stderr, "Bad ram pointer %p\n", ptr);
2834
        abort();
2835
    }
2836
    return ram_addr;
2837
}
2838

    
2839
static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2840
                                    unsigned size)
2841
{
2842
#ifdef DEBUG_UNASSIGNED
2843
    printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2844
#endif
2845
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2846
    cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2847
#endif
2848
    return 0;
2849
}
2850

    
2851
static void unassigned_mem_write(void *opaque, hwaddr addr,
2852
                                 uint64_t val, unsigned size)
2853
{
2854
#ifdef DEBUG_UNASSIGNED
2855
    printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2856
#endif
2857
#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2858
    cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2859
#endif
2860
}
2861

    
2862
static const MemoryRegionOps unassigned_mem_ops = {
2863
    .read = unassigned_mem_read,
2864
    .write = unassigned_mem_write,
2865
    .endianness = DEVICE_NATIVE_ENDIAN,
2866
};
2867

    
2868
static uint64_t error_mem_read(void *opaque, hwaddr addr,
2869
                               unsigned size)
2870
{
2871
    abort();
2872
}
2873

    
2874
static void error_mem_write(void *opaque, hwaddr addr,
2875
                            uint64_t value, unsigned size)
2876
{
2877
    abort();
2878
}
2879

    
2880
static const MemoryRegionOps error_mem_ops = {
2881
    .read = error_mem_read,
2882
    .write = error_mem_write,
2883
    .endianness = DEVICE_NATIVE_ENDIAN,
2884
};
2885

    
2886
static const MemoryRegionOps rom_mem_ops = {
2887
    .read = error_mem_read,
2888
    .write = unassigned_mem_write,
2889
    .endianness = DEVICE_NATIVE_ENDIAN,
2890
};
2891

    
2892
static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2893
                               uint64_t val, unsigned size)
2894
{
2895
    int dirty_flags;
2896
    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2897
    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2898
#if !defined(CONFIG_USER_ONLY)
2899
        tb_invalidate_phys_page_fast(ram_addr, size);
2900
        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2901
#endif
2902
    }
2903
    switch (size) {
2904
    case 1:
2905
        stb_p(qemu_get_ram_ptr(ram_addr), val);
2906
        break;
2907
    case 2:
2908
        stw_p(qemu_get_ram_ptr(ram_addr), val);
2909
        break;
2910
    case 4:
2911
        stl_p(qemu_get_ram_ptr(ram_addr), val);
2912
        break;
2913
    default:
2914
        abort();
2915
    }
2916
    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2917
    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2918
    /* we remove the notdirty callback only if the code has been
2919
       flushed */
2920
    if (dirty_flags == 0xff)
2921
        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2922
}
2923

    
2924
static const MemoryRegionOps notdirty_mem_ops = {
2925
    .read = error_mem_read,
2926
    .write = notdirty_mem_write,
2927
    .endianness = DEVICE_NATIVE_ENDIAN,
2928
};
2929

    
2930
/* Generate a debug exception if a watchpoint has been hit.  */
2931
static void check_watchpoint(int offset, int len_mask, int flags)
2932
{
2933
    CPUArchState *env = cpu_single_env;
2934
    target_ulong pc, cs_base;
2935
    TranslationBlock *tb;
2936
    target_ulong vaddr;
2937
    CPUWatchpoint *wp;
2938
    int cpu_flags;
2939

    
2940
    if (env->watchpoint_hit) {
2941
        /* We re-entered the check after replacing the TB. Now raise
2942
         * the debug interrupt so that is will trigger after the
2943
         * current instruction. */
2944
        cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2945
        return;
2946
    }
2947
    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2948
    QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2949
        if ((vaddr == (wp->vaddr & len_mask) ||
2950
             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2951
            wp->flags |= BP_WATCHPOINT_HIT;
2952
            if (!env->watchpoint_hit) {
2953
                env->watchpoint_hit = wp;
2954
                tb = tb_find_pc(env->mem_io_pc);
2955
                if (!tb) {
2956
                    cpu_abort(env, "check_watchpoint: could not find TB for "
2957
                              "pc=%p", (void *)env->mem_io_pc);
2958
                }
2959
                cpu_restore_state(tb, env, env->mem_io_pc);
2960
                tb_phys_invalidate(tb, -1);
2961
                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2962
                    env->exception_index = EXCP_DEBUG;
2963
                    cpu_loop_exit(env);
2964
                } else {
2965
                    cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2966
                    tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2967
                    cpu_resume_from_signal(env, NULL);
2968
                }
2969
            }
2970
        } else {
2971
            wp->flags &= ~BP_WATCHPOINT_HIT;
2972
        }
2973
    }
2974
}
2975

    
2976
/* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
2977
   so these check for a hit then pass through to the normal out-of-line
2978
   phys routines.  */
2979
static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2980
                               unsigned size)
2981
{
2982
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2983
    switch (size) {
2984
    case 1: return ldub_phys(addr);
2985
    case 2: return lduw_phys(addr);
2986
    case 4: return ldl_phys(addr);
2987
    default: abort();
2988
    }
2989
}
2990

    
2991
static void watch_mem_write(void *opaque, hwaddr addr,
2992
                            uint64_t val, unsigned size)
2993
{
2994
    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2995
    switch (size) {
2996
    case 1:
2997
        stb_phys(addr, val);
2998
        break;
2999
    case 2:
3000
        stw_phys(addr, val);
3001
        break;
3002
    case 4:
3003
        stl_phys(addr, val);
3004
        break;
3005
    default: abort();
3006
    }
3007
}
3008

    
3009
static const MemoryRegionOps watch_mem_ops = {
3010
    .read = watch_mem_read,
3011
    .write = watch_mem_write,
3012
    .endianness = DEVICE_NATIVE_ENDIAN,
3013
};
3014

    
3015
static uint64_t subpage_read(void *opaque, hwaddr addr,
3016
                             unsigned len)
3017
{
3018
    subpage_t *mmio = opaque;
3019
    unsigned int idx = SUBPAGE_IDX(addr);
3020
    MemoryRegionSection *section;
3021
#if defined(DEBUG_SUBPAGE)
3022
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3023
           mmio, len, addr, idx);
3024
#endif
3025

    
3026
    section = &phys_sections[mmio->sub_section[idx]];
3027
    addr += mmio->base;
3028
    addr -= section->offset_within_address_space;
3029
    addr += section->offset_within_region;
3030
    return io_mem_read(section->mr, addr, len);
3031
}
3032

    
3033
static void subpage_write(void *opaque, hwaddr addr,
3034
                          uint64_t value, unsigned len)
3035
{
3036
    subpage_t *mmio = opaque;
3037
    unsigned int idx = SUBPAGE_IDX(addr);
3038
    MemoryRegionSection *section;
3039
#if defined(DEBUG_SUBPAGE)
3040
    printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3041
           " idx %d value %"PRIx64"\n",
3042
           __func__, mmio, len, addr, idx, value);
3043
#endif
3044

    
3045
    section = &phys_sections[mmio->sub_section[idx]];
3046
    addr += mmio->base;
3047
    addr -= section->offset_within_address_space;
3048
    addr += section->offset_within_region;
3049
    io_mem_write(section->mr, addr, value, len);
3050
}
3051

    
3052
static const MemoryRegionOps subpage_ops = {
3053
    .read = subpage_read,
3054
    .write = subpage_write,
3055
    .endianness = DEVICE_NATIVE_ENDIAN,
3056
};
3057

    
3058
static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3059
                                 unsigned size)
3060
{
3061
    ram_addr_t raddr = addr;
3062
    void *ptr = qemu_get_ram_ptr(raddr);
3063
    switch (size) {
3064
    case 1: return ldub_p(ptr);
3065
    case 2: return lduw_p(ptr);
3066
    case 4: return ldl_p(ptr);
3067
    default: abort();
3068
    }
3069
}
3070

    
3071
static void subpage_ram_write(void *opaque, hwaddr addr,
3072
                              uint64_t value, unsigned size)
3073
{
3074
    ram_addr_t raddr = addr;
3075
    void *ptr = qemu_get_ram_ptr(raddr);
3076
    switch (size) {
3077
    case 1: return stb_p(ptr, value);
3078
    case 2: return stw_p(ptr, value);
3079
    case 4: return stl_p(ptr, value);
3080
    default: abort();
3081
    }
3082
}
3083

    
3084
static const MemoryRegionOps subpage_ram_ops = {
3085
    .read = subpage_ram_read,
3086
    .write = subpage_ram_write,
3087
    .endianness = DEVICE_NATIVE_ENDIAN,
3088
};
3089

    
3090
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3091
                             uint16_t section)
3092
{
3093
    int idx, eidx;
3094

    
3095
    if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3096
        return -1;
3097
    idx = SUBPAGE_IDX(start);
3098
    eidx = SUBPAGE_IDX(end);
3099
#if defined(DEBUG_SUBPAGE)
3100
    printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3101
           mmio, start, end, idx, eidx, memory);
3102
#endif
3103
    if (memory_region_is_ram(phys_sections[section].mr)) {
3104
        MemoryRegionSection new_section = phys_sections[section];
3105
        new_section.mr = &io_mem_subpage_ram;
3106
        section = phys_section_add(&new_section);
3107
    }
3108
    for (; idx <= eidx; idx++) {
3109
        mmio->sub_section[idx] = section;
3110
    }
3111

    
3112
    return 0;
3113
}
3114

    
3115
static subpage_t *subpage_init(hwaddr base)
3116
{
3117
    subpage_t *mmio;
3118

    
3119
    mmio = g_malloc0(sizeof(subpage_t));
3120

    
3121
    mmio->base = base;
3122
    memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3123
                          "subpage", TARGET_PAGE_SIZE);
3124
    mmio->iomem.subpage = true;
3125
#if defined(DEBUG_SUBPAGE)
3126
    printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3127
           mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3128
#endif
3129
    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3130

    
3131
    return mmio;
3132
}
3133

    
3134
static uint16_t dummy_section(MemoryRegion *mr)
3135
{
3136
    MemoryRegionSection section = {
3137
        .mr = mr,
3138
        .offset_within_address_space = 0,
3139
        .offset_within_region = 0,
3140
        .size = UINT64_MAX,
3141
    };
3142

    
3143
    return phys_section_add(&section);
3144
}
3145

    
3146
MemoryRegion *iotlb_to_region(hwaddr index)
3147
{
3148
    return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3149
}
3150

    
3151
static void io_mem_init(void)
3152
{
3153
    memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3154
    memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3155
    memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3156
                          "unassigned", UINT64_MAX);
3157
    memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3158
                          "notdirty", UINT64_MAX);
3159
    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3160
                          "subpage-ram", UINT64_MAX);
3161
    memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3162
                          "watch", UINT64_MAX);
3163
}
3164

    
3165
static void mem_begin(MemoryListener *listener)
3166
{
3167
    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3168

    
3169
    destroy_all_mappings(d);
3170
    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3171
}
3172

    
3173
static void core_begin(MemoryListener *listener)
3174
{
3175
    phys_sections_clear();
3176
    phys_section_unassigned = dummy_section(&io_mem_unassigned);
3177
    phys_section_notdirty = dummy_section(&io_mem_notdirty);
3178
    phys_section_rom = dummy_section(&io_mem_rom);
3179
    phys_section_watch = dummy_section(&io_mem_watch);
3180
}
3181

    
3182
static void tcg_commit(MemoryListener *listener)
3183
{
3184
    CPUArchState *env;
3185

    
3186
    /* since each CPU stores ram addresses in its TLB cache, we must
3187
       reset the modified entries */
3188
    /* XXX: slow ! */
3189
    for(env = first_cpu; env != NULL; env = env->next_cpu) {
3190
        tlb_flush(env, 1);
3191
    }
3192
}
3193

    
3194
static void core_log_global_start(MemoryListener *listener)
3195
{
3196
    cpu_physical_memory_set_dirty_tracking(1);
3197
}
3198

    
3199
static void core_log_global_stop(MemoryListener *listener)
3200
{
3201
    cpu_physical_memory_set_dirty_tracking(0);
3202
}
3203

    
3204
static void io_region_add(MemoryListener *listener,
3205
                          MemoryRegionSection *section)
3206
{
3207
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3208

    
3209
    mrio->mr = section->mr;
3210
    mrio->offset = section->offset_within_region;
3211
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3212
                 section->offset_within_address_space, section->size);
3213
    ioport_register(&mrio->iorange);
3214
}
3215

    
3216
static void io_region_del(MemoryListener *listener,
3217
                          MemoryRegionSection *section)
3218
{
3219
    isa_unassign_ioport(section->offset_within_address_space, section->size);
3220
}
3221

    
3222
static MemoryListener core_memory_listener = {
3223
    .begin = core_begin,
3224
    .log_global_start = core_log_global_start,
3225
    .log_global_stop = core_log_global_stop,
3226
    .priority = 1,
3227
};
3228

    
3229
static MemoryListener io_memory_listener = {
3230
    .region_add = io_region_add,
3231
    .region_del = io_region_del,
3232
    .priority = 0,
3233
};
3234

    
3235
static MemoryListener tcg_memory_listener = {
3236
    .commit = tcg_commit,
3237
};
3238

    
3239
void address_space_init_dispatch(AddressSpace *as)
3240
{
3241
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3242

    
3243
    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3244
    d->listener = (MemoryListener) {
3245
        .begin = mem_begin,
3246
        .region_add = mem_add,
3247
        .region_nop = mem_add,
3248
        .priority = 0,
3249
    };
3250
    as->dispatch = d;
3251
    memory_listener_register(&d->listener, as);
3252
}
3253

    
3254
void address_space_destroy_dispatch(AddressSpace *as)
3255
{
3256
    AddressSpaceDispatch *d = as->dispatch;
3257

    
3258
    memory_listener_unregister(&d->listener);
3259
    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3260
    g_free(d);
3261
    as->dispatch = NULL;
3262
}
3263

    
3264
static void memory_map_init(void)
3265
{
3266
    system_memory = g_malloc(sizeof(*system_memory));
3267
    memory_region_init(system_memory, "system", INT64_MAX);
3268
    address_space_init(&address_space_memory, system_memory);
3269
    address_space_memory.name = "memory";
3270

    
3271
    system_io = g_malloc(sizeof(*system_io));
3272
    memory_region_init(system_io, "io", 65536);
3273
    address_space_init(&address_space_io, system_io);
3274
    address_space_io.name = "I/O";
3275

    
3276
    memory_listener_register(&core_memory_listener, &address_space_memory);
3277
    memory_listener_register(&io_memory_listener, &address_space_io);
3278
    memory_listener_register(&tcg_memory_listener, &address_space_memory);
3279
}
3280

    
3281
MemoryRegion *get_system_memory(void)
3282
{
3283
    return system_memory;
3284
}
3285

    
3286
MemoryRegion *get_system_io(void)
3287
{
3288
    return system_io;
3289
}
3290

    
3291
#endif /* !defined(CONFIG_USER_ONLY) */
3292

    
3293
/* physical memory access (slow version, mainly for debug) */
3294
#if defined(CONFIG_USER_ONLY)
3295
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3296
                        uint8_t *buf, int len, int is_write)
3297
{
3298
    int l, flags;
3299
    target_ulong page;
3300
    void * p;
3301

    
3302
    while (len > 0) {
3303
        page = addr & TARGET_PAGE_MASK;
3304
        l = (page + TARGET_PAGE_SIZE) - addr;
3305
        if (l > len)
3306
            l = len;
3307
        flags = page_get_flags(page);
3308
        if (!(flags & PAGE_VALID))
3309
            return -1;
3310
        if (is_write) {
3311
            if (!(flags & PAGE_WRITE))
3312
                return -1;
3313
            /* XXX: this code should not depend on lock_user */
3314
            if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3315
                return -1;
3316
            memcpy(p, buf, l);
3317
            unlock_user(p, addr, l);
3318
        } else {
3319
            if (!(flags & PAGE_READ))
3320
                return -1;
3321
            /* XXX: this code should not depend on lock_user */
3322
            if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3323
                return -1;
3324
            memcpy(buf, p, l);
3325
            unlock_user(p, addr, 0);
3326
        }
3327
        len -= l;
3328
        buf += l;
3329
        addr += l;
3330
    }
3331
    return 0;
3332
}
3333

    
3334
#else
3335

    
3336
static void invalidate_and_set_dirty(hwaddr addr,
3337
                                     hwaddr length)
3338
{
3339
    if (!cpu_physical_memory_is_dirty(addr)) {
3340
        /* invalidate code */
3341
        tb_invalidate_phys_page_range(addr, addr + length, 0);
3342
        /* set dirty bit */
3343
        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3344
    }
3345
    xen_modified_memory(addr, length);
3346
}
3347

    
3348
void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3349
                      int len, bool is_write)
3350
{
3351
    AddressSpaceDispatch *d = as->dispatch;
3352
    int l;
3353
    uint8_t *ptr;
3354
    uint32_t val;
3355
    hwaddr page;
3356
    MemoryRegionSection *section;
3357

    
3358
    while (len > 0) {
3359
        page = addr & TARGET_PAGE_MASK;
3360
        l = (page + TARGET_PAGE_SIZE) - addr;
3361
        if (l > len)
3362
            l = len;
3363
        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3364

    
3365
        if (is_write) {
3366
            if (!memory_region_is_ram(section->mr)) {
3367
                hwaddr addr1;
3368
                addr1 = memory_region_section_addr(section, addr);
3369
                /* XXX: could force cpu_single_env to NULL to avoid
3370
                   potential bugs */
3371
                if (l >= 4 && ((addr1 & 3) == 0)) {
3372
                    /* 32 bit write access */
3373
                    val = ldl_p(buf);
3374
                    io_mem_write(section->mr, addr1, val, 4);
3375
                    l = 4;
3376
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3377
                    /* 16 bit write access */
3378
                    val = lduw_p(buf);
3379
                    io_mem_write(section->mr, addr1, val, 2);
3380
                    l = 2;
3381
                } else {
3382
                    /* 8 bit write access */
3383
                    val = ldub_p(buf);
3384
                    io_mem_write(section->mr, addr1, val, 1);
3385
                    l = 1;
3386
                }
3387
            } else if (!section->readonly) {
3388
                ram_addr_t addr1;
3389
                addr1 = memory_region_get_ram_addr(section->mr)
3390
                    + memory_region_section_addr(section, addr);
3391
                /* RAM case */
3392
                ptr = qemu_get_ram_ptr(addr1);
3393
                memcpy(ptr, buf, l);
3394
                invalidate_and_set_dirty(addr1, l);
3395
                qemu_put_ram_ptr(ptr);
3396
            }
3397
        } else {
3398
            if (!(memory_region_is_ram(section->mr) ||
3399
                  memory_region_is_romd(section->mr))) {
3400
                hwaddr addr1;
3401
                /* I/O case */
3402
                addr1 = memory_region_section_addr(section, addr);
3403
                if (l >= 4 && ((addr1 & 3) == 0)) {
3404
                    /* 32 bit read access */
3405
                    val = io_mem_read(section->mr, addr1, 4);
3406
                    stl_p(buf, val);
3407
                    l = 4;
3408
                } else if (l >= 2 && ((addr1 & 1) == 0)) {
3409
                    /* 16 bit read access */
3410
                    val = io_mem_read(section->mr, addr1, 2);
3411
                    stw_p(buf, val);
3412
                    l = 2;
3413
                } else {
3414
                    /* 8 bit read access */
3415
                    val = io_mem_read(section->mr, addr1, 1);
3416
                    stb_p(buf, val);
3417
                    l = 1;
3418
                }
3419
            } else {
3420
                /* RAM case */
3421
                ptr = qemu_get_ram_ptr(section->mr->ram_addr
3422
                                       + memory_region_section_addr(section,
3423
                                                                    addr));
3424
                memcpy(buf, ptr, l);
3425
                qemu_put_ram_ptr(ptr);
3426
            }
3427
        }
3428
        len -= l;
3429
        buf += l;
3430
        addr += l;
3431
    }
3432
}
3433

    
3434
void address_space_write(AddressSpace *as, hwaddr addr,
3435
                         const uint8_t *buf, int len)
3436
{
3437
    address_space_rw(as, addr, (uint8_t *)buf, len, true);
3438
}
3439

    
3440
/**
3441
 * address_space_read: read from an address space.
3442
 *
3443
 * @as: #AddressSpace to be accessed
3444
 * @addr: address within that address space
3445
 * @buf: buffer with the data transferred
3446
 */
3447
void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3448
{
3449
    address_space_rw(as, addr, buf, len, false);
3450
}
3451

    
3452

    
3453
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3454
                            int len, int is_write)
3455
{
3456
    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3457
}
3458

    
3459
/* used for ROM loading : can write in RAM and ROM */
3460
void cpu_physical_memory_write_rom(hwaddr addr,
3461
                                   const uint8_t *buf, int len)
3462
{
3463
    AddressSpaceDispatch *d = address_space_memory.dispatch;
3464
    int l;
3465
    uint8_t *ptr;
3466
    hwaddr page;
3467
    MemoryRegionSection *section;
3468

    
3469
    while (len > 0) {
3470
        page = addr & TARGET_PAGE_MASK;
3471
        l = (page + TARGET_PAGE_SIZE) - addr;
3472
        if (l > len)
3473
            l = len;
3474
        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3475

    
3476
        if (!(memory_region_is_ram(section->mr) ||
3477
              memory_region_is_romd(section->mr))) {
3478
            /* do nothing */
3479
        } else {
3480
            unsigned long addr1;
3481
            addr1 = memory_region_get_ram_addr(section->mr)
3482
                + memory_region_section_addr(section, addr);
3483
            /* ROM/RAM case */
3484
            ptr = qemu_get_ram_ptr(addr1);
3485
            memcpy(ptr, buf, l);
3486
            invalidate_and_set_dirty(addr1, l);
3487
            qemu_put_ram_ptr(ptr);
3488
        }
3489
        len -= l;
3490
        buf += l;
3491
        addr += l;
3492
    }
3493
}
3494

    
3495
typedef struct {
3496
    void *buffer;
3497
    hwaddr addr;
3498
    hwaddr len;
3499
} BounceBuffer;
3500

    
3501
static BounceBuffer bounce;
3502

    
3503
typedef struct MapClient {
3504
    void *opaque;
3505
    void (*callback)(void *opaque);
3506
    QLIST_ENTRY(MapClient) link;
3507
} MapClient;
3508

    
3509
static QLIST_HEAD(map_client_list, MapClient) map_client_list
3510
    = QLIST_HEAD_INITIALIZER(map_client_list);
3511

    
3512
void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3513
{
3514
    MapClient *client = g_malloc(sizeof(*client));
3515

    
3516
    client->opaque = opaque;
3517
    client->callback = callback;
3518
    QLIST_INSERT_HEAD(&map_client_list, client, link);
3519
    return client;
3520
}
3521

    
3522
void cpu_unregister_map_client(void *_client)
3523
{
3524
    MapClient *client = (MapClient *)_client;
3525

    
3526
    QLIST_REMOVE(client, link);
3527
    g_free(client);
3528
}
3529

    
3530
static void cpu_notify_map_clients(void)
3531
{
3532
    MapClient *client;
3533

    
3534
    while (!QLIST_EMPTY(&map_client_list)) {
3535
        client = QLIST_FIRST(&map_client_list);
3536
        client->callback(client->opaque);
3537
        cpu_unregister_map_client(client);
3538
    }
3539
}
3540

    
3541
/* Map a physical memory region into a host virtual address.
3542
 * May map a subset of the requested range, given by and returned in *plen.
3543
 * May return NULL if resources needed to perform the mapping are exhausted.
3544
 * Use only for reads OR writes - not for read-modify-write operations.
3545
 * Use cpu_register_map_client() to know when retrying the map operation is
3546
 * likely to succeed.
3547
 */
3548
void *address_space_map(AddressSpace *as,
3549
                        hwaddr addr,
3550
                        hwaddr *plen,
3551
                        bool is_write)
3552
{
3553
    AddressSpaceDispatch *d = as->dispatch;
3554
    hwaddr len = *plen;
3555
    hwaddr todo = 0;
3556
    int l;
3557
    hwaddr page;
3558
    MemoryRegionSection *section;
3559
    ram_addr_t raddr = RAM_ADDR_MAX;
3560
    ram_addr_t rlen;
3561
    void *ret;
3562

    
3563
    while (len > 0) {
3564
        page = addr & TARGET_PAGE_MASK;
3565
        l = (page + TARGET_PAGE_SIZE) - addr;
3566
        if (l > len)
3567
            l = len;
3568
        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3569

    
3570
        if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3571
            if (todo || bounce.buffer) {
3572
                break;
3573
            }
3574
            bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3575
            bounce.addr = addr;
3576
            bounce.len = l;
3577
            if (!is_write) {
3578
                address_space_read(as, addr, bounce.buffer, l);
3579
            }
3580

    
3581
            *plen = l;
3582
            return bounce.buffer;
3583
        }
3584
        if (!todo) {
3585
            raddr = memory_region_get_ram_addr(section->mr)
3586
                + memory_region_section_addr(section, addr);
3587
        }
3588

    
3589
        len -= l;
3590
        addr += l;
3591
        todo += l;
3592
    }
3593
    rlen = todo;
3594
    ret = qemu_ram_ptr_length(raddr, &rlen);
3595
    *plen = rlen;
3596
    return ret;
3597
}
3598

    
3599
/* Unmaps a memory region previously mapped by address_space_map().
3600
 * Will also mark the memory as dirty if is_write == 1.  access_len gives
3601
 * the amount of memory that was actually read or written by the caller.
3602
 */
3603
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3604
                         int is_write, hwaddr access_len)
3605
{
3606
    if (buffer != bounce.buffer) {
3607
        if (is_write) {
3608
            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3609
            while (access_len) {
3610
                unsigned l;
3611
                l = TARGET_PAGE_SIZE;
3612
                if (l > access_len)
3613
                    l = access_len;
3614
                invalidate_and_set_dirty(addr1, l);
3615
                addr1 += l;
3616
                access_len -= l;
3617
            }
3618
        }
3619
        if (xen_enabled()) {
3620
            xen_invalidate_map_cache_entry(buffer);
3621
        }
3622
        return;
3623
    }
3624
    if (is_write) {
3625
        address_space_write(as, bounce.addr, bounce.buffer, access_len);
3626
    }
3627
    qemu_vfree(bounce.buffer);
3628
    bounce.buffer = NULL;
3629
    cpu_notify_map_clients();
3630
}
3631

    
3632
void *cpu_physical_memory_map(hwaddr addr,
3633
                              hwaddr *plen,
3634
                              int is_write)
3635
{
3636
    return address_space_map(&address_space_memory, addr, plen, is_write);
3637
}
3638

    
3639
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3640
                               int is_write, hwaddr access_len)
3641
{
3642
    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3643
}
3644

    
3645
/* warning: addr must be aligned */
3646
static inline uint32_t ldl_phys_internal(hwaddr addr,
3647
                                         enum device_endian endian)
3648
{
3649
    uint8_t *ptr;
3650
    uint32_t val;
3651
    MemoryRegionSection *section;
3652

    
3653
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3654

    
3655
    if (!(memory_region_is_ram(section->mr) ||
3656
          memory_region_is_romd(section->mr))) {
3657
        /* I/O case */
3658
        addr = memory_region_section_addr(section, addr);
3659
        val = io_mem_read(section->mr, addr, 4);
3660
#if defined(TARGET_WORDS_BIGENDIAN)
3661
        if (endian == DEVICE_LITTLE_ENDIAN) {
3662
            val = bswap32(val);
3663
        }
3664
#else
3665
        if (endian == DEVICE_BIG_ENDIAN) {
3666
            val = bswap32(val);
3667
        }
3668
#endif
3669
    } else {
3670
        /* RAM case */
3671
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3672
                                & TARGET_PAGE_MASK)
3673
                               + memory_region_section_addr(section, addr));
3674
        switch (endian) {
3675
        case DEVICE_LITTLE_ENDIAN:
3676
            val = ldl_le_p(ptr);
3677
            break;
3678
        case DEVICE_BIG_ENDIAN:
3679
            val = ldl_be_p(ptr);
3680
            break;
3681
        default:
3682
            val = ldl_p(ptr);
3683
            break;
3684
        }
3685
    }
3686
    return val;
3687
}
3688

    
3689
uint32_t ldl_phys(hwaddr addr)
3690
{
3691
    return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3692
}
3693

    
3694
uint32_t ldl_le_phys(hwaddr addr)
3695
{
3696
    return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3697
}
3698

    
3699
uint32_t ldl_be_phys(hwaddr addr)
3700
{
3701
    return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3702
}
3703

    
3704
/* warning: addr must be aligned */
3705
static inline uint64_t ldq_phys_internal(hwaddr addr,
3706
                                         enum device_endian endian)
3707
{
3708
    uint8_t *ptr;
3709
    uint64_t val;
3710
    MemoryRegionSection *section;
3711

    
3712
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3713

    
3714
    if (!(memory_region_is_ram(section->mr) ||
3715
          memory_region_is_romd(section->mr))) {
3716
        /* I/O case */
3717
        addr = memory_region_section_addr(section, addr);
3718

    
3719
        /* XXX This is broken when device endian != cpu endian.
3720
               Fix and add "endian" variable check */
3721
#ifdef TARGET_WORDS_BIGENDIAN
3722
        val = io_mem_read(section->mr, addr, 4) << 32;
3723
        val |= io_mem_read(section->mr, addr + 4, 4);
3724
#else
3725
        val = io_mem_read(section->mr, addr, 4);
3726
        val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3727
#endif
3728
    } else {
3729
        /* RAM case */
3730
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3731
                                & TARGET_PAGE_MASK)
3732
                               + memory_region_section_addr(section, addr));
3733
        switch (endian) {
3734
        case DEVICE_LITTLE_ENDIAN:
3735
            val = ldq_le_p(ptr);
3736
            break;
3737
        case DEVICE_BIG_ENDIAN:
3738
            val = ldq_be_p(ptr);
3739
            break;
3740
        default:
3741
            val = ldq_p(ptr);
3742
            break;
3743
        }
3744
    }
3745
    return val;
3746
}
3747

    
3748
uint64_t ldq_phys(hwaddr addr)
3749
{
3750
    return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3751
}
3752

    
3753
uint64_t ldq_le_phys(hwaddr addr)
3754
{
3755
    return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3756
}
3757

    
3758
uint64_t ldq_be_phys(hwaddr addr)
3759
{
3760
    return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3761
}
3762

    
3763
/* XXX: optimize */
3764
uint32_t ldub_phys(hwaddr addr)
3765
{
3766
    uint8_t val;
3767
    cpu_physical_memory_read(addr, &val, 1);
3768
    return val;
3769
}
3770

    
3771
/* warning: addr must be aligned */
3772
static inline uint32_t lduw_phys_internal(hwaddr addr,
3773
                                          enum device_endian endian)
3774
{
3775
    uint8_t *ptr;
3776
    uint64_t val;
3777
    MemoryRegionSection *section;
3778

    
3779
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3780

    
3781
    if (!(memory_region_is_ram(section->mr) ||
3782
          memory_region_is_romd(section->mr))) {
3783
        /* I/O case */
3784
        addr = memory_region_section_addr(section, addr);
3785
        val = io_mem_read(section->mr, addr, 2);
3786
#if defined(TARGET_WORDS_BIGENDIAN)
3787
        if (endian == DEVICE_LITTLE_ENDIAN) {
3788
            val = bswap16(val);
3789
        }
3790
#else
3791
        if (endian == DEVICE_BIG_ENDIAN) {
3792
            val = bswap16(val);
3793
        }
3794
#endif
3795
    } else {
3796
        /* RAM case */
3797
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3798
                                & TARGET_PAGE_MASK)
3799
                               + memory_region_section_addr(section, addr));
3800
        switch (endian) {
3801
        case DEVICE_LITTLE_ENDIAN:
3802
            val = lduw_le_p(ptr);
3803
            break;
3804
        case DEVICE_BIG_ENDIAN:
3805
            val = lduw_be_p(ptr);
3806
            break;
3807
        default:
3808
            val = lduw_p(ptr);
3809
            break;
3810
        }
3811
    }
3812
    return val;
3813
}
3814

    
3815
uint32_t lduw_phys(hwaddr addr)
3816
{
3817
    return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3818
}
3819

    
3820
uint32_t lduw_le_phys(hwaddr addr)
3821
{
3822
    return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3823
}
3824

    
3825
uint32_t lduw_be_phys(hwaddr addr)
3826
{
3827
    return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3828
}
3829

    
3830
/* warning: addr must be aligned. The ram page is not masked as dirty
3831
   and the code inside is not invalidated. It is useful if the dirty
3832
   bits are used to track modified PTEs */
3833
void stl_phys_notdirty(hwaddr addr, uint32_t val)
3834
{
3835
    uint8_t *ptr;
3836
    MemoryRegionSection *section;
3837

    
3838
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3839

    
3840
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3841
        addr = memory_region_section_addr(section, addr);
3842
        if (memory_region_is_ram(section->mr)) {
3843
            section = &phys_sections[phys_section_rom];
3844
        }
3845
        io_mem_write(section->mr, addr, val, 4);
3846
    } else {
3847
        unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3848
                               & TARGET_PAGE_MASK)
3849
            + memory_region_section_addr(section, addr);
3850
        ptr = qemu_get_ram_ptr(addr1);
3851
        stl_p(ptr, val);
3852

    
3853
        if (unlikely(in_migration)) {
3854
            if (!cpu_physical_memory_is_dirty(addr1)) {
3855
                /* invalidate code */
3856
                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3857
                /* set dirty bit */
3858
                cpu_physical_memory_set_dirty_flags(
3859
                    addr1, (0xff & ~CODE_DIRTY_FLAG));
3860
            }
3861
        }
3862
    }
3863
}
3864

    
3865
void stq_phys_notdirty(hwaddr addr, uint64_t val)
3866
{
3867
    uint8_t *ptr;
3868
    MemoryRegionSection *section;
3869

    
3870
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3871

    
3872
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3873
        addr = memory_region_section_addr(section, addr);
3874
        if (memory_region_is_ram(section->mr)) {
3875
            section = &phys_sections[phys_section_rom];
3876
        }
3877
#ifdef TARGET_WORDS_BIGENDIAN
3878
        io_mem_write(section->mr, addr, val >> 32, 4);
3879
        io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3880
#else
3881
        io_mem_write(section->mr, addr, (uint32_t)val, 4);
3882
        io_mem_write(section->mr, addr + 4, val >> 32, 4);
3883
#endif
3884
    } else {
3885
        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3886
                                & TARGET_PAGE_MASK)
3887
                               + memory_region_section_addr(section, addr));
3888
        stq_p(ptr, val);
3889
    }
3890
}
3891

    
3892
/* warning: addr must be aligned */
3893
static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3894
                                     enum device_endian endian)
3895
{
3896
    uint8_t *ptr;
3897
    MemoryRegionSection *section;
3898

    
3899
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3900

    
3901
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3902
        addr = memory_region_section_addr(section, addr);
3903
        if (memory_region_is_ram(section->mr)) {
3904
            section = &phys_sections[phys_section_rom];
3905
        }
3906
#if defined(TARGET_WORDS_BIGENDIAN)
3907
        if (endian == DEVICE_LITTLE_ENDIAN) {
3908
            val = bswap32(val);
3909
        }
3910
#else
3911
        if (endian == DEVICE_BIG_ENDIAN) {
3912
            val = bswap32(val);
3913
        }
3914
#endif
3915
        io_mem_write(section->mr, addr, val, 4);
3916
    } else {
3917
        unsigned long addr1;
3918
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3919
            + memory_region_section_addr(section, addr);
3920
        /* RAM case */
3921
        ptr = qemu_get_ram_ptr(addr1);
3922
        switch (endian) {
3923
        case DEVICE_LITTLE_ENDIAN:
3924
            stl_le_p(ptr, val);
3925
            break;
3926
        case DEVICE_BIG_ENDIAN:
3927
            stl_be_p(ptr, val);
3928
            break;
3929
        default:
3930
            stl_p(ptr, val);
3931
            break;
3932
        }
3933
        invalidate_and_set_dirty(addr1, 4);
3934
    }
3935
}
3936

    
3937
void stl_phys(hwaddr addr, uint32_t val)
3938
{
3939
    stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3940
}
3941

    
3942
void stl_le_phys(hwaddr addr, uint32_t val)
3943
{
3944
    stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3945
}
3946

    
3947
void stl_be_phys(hwaddr addr, uint32_t val)
3948
{
3949
    stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3950
}
3951

    
3952
/* XXX: optimize */
3953
void stb_phys(hwaddr addr, uint32_t val)
3954
{
3955
    uint8_t v = val;
3956
    cpu_physical_memory_write(addr, &v, 1);
3957
}
3958

    
3959
/* warning: addr must be aligned */
3960
static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3961
                                     enum device_endian endian)
3962
{
3963
    uint8_t *ptr;
3964
    MemoryRegionSection *section;
3965

    
3966
    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3967

    
3968
    if (!memory_region_is_ram(section->mr) || section->readonly) {
3969
        addr = memory_region_section_addr(section, addr);
3970
        if (memory_region_is_ram(section->mr)) {
3971
            section = &phys_sections[phys_section_rom];
3972
        }
3973
#if defined(TARGET_WORDS_BIGENDIAN)
3974
        if (endian == DEVICE_LITTLE_ENDIAN) {
3975
            val = bswap16(val);
3976
        }
3977
#else
3978
        if (endian == DEVICE_BIG_ENDIAN) {
3979
            val = bswap16(val);
3980
        }
3981
#endif
3982
        io_mem_write(section->mr, addr, val, 2);
3983
    } else {
3984
        unsigned long addr1;
3985
        addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3986
            + memory_region_section_addr(section, addr);
3987
        /* RAM case */
3988
        ptr = qemu_get_ram_ptr(addr1);
3989
        switch (endian) {
3990
        case DEVICE_LITTLE_ENDIAN:
3991
            stw_le_p(ptr, val);
3992
            break;
3993
        case DEVICE_BIG_ENDIAN:
3994
            stw_be_p(ptr, val);
3995
            break;
3996
        default:
3997
            stw_p(ptr, val);
3998
            break;
3999
        }
4000
        invalidate_and_set_dirty(addr1, 2);
4001
    }
4002
}
4003

    
4004
void stw_phys(hwaddr addr, uint32_t val)
4005
{
4006
    stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4007
}
4008

    
4009
void stw_le_phys(hwaddr addr, uint32_t val)
4010
{
4011
    stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4012
}
4013

    
4014
void stw_be_phys(hwaddr addr, uint32_t val)
4015
{
4016
    stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4017
}
4018

    
4019
/* XXX: optimize */
4020
void stq_phys(hwaddr addr, uint64_t val)
4021
{
4022
    val = tswap64(val);
4023
    cpu_physical_memory_write(addr, &val, 8);
4024
}
4025

    
4026
void stq_le_phys(hwaddr addr, uint64_t val)
4027
{
4028
    val = cpu_to_le64(val);
4029
    cpu_physical_memory_write(addr, &val, 8);
4030
}
4031

    
4032
void stq_be_phys(hwaddr addr, uint64_t val)
4033
{
4034
    val = cpu_to_be64(val);
4035
    cpu_physical_memory_write(addr, &val, 8);
4036
}
4037

    
4038
/* virtual memory access for debug (includes writing to ROM) */
4039
int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4040
                        uint8_t *buf, int len, int is_write)
4041
{
4042
    int l;
4043
    hwaddr phys_addr;
4044
    target_ulong page;
4045

    
4046
    while (len > 0) {
4047
        page = addr & TARGET_PAGE_MASK;
4048
        phys_addr = cpu_get_phys_page_debug(env, page);
4049
        /* if no physical page mapped, return an error */
4050
        if (phys_addr == -1)
4051
            return -1;
4052
        l = (page + TARGET_PAGE_SIZE) - addr;
4053
        if (l > len)
4054
            l = len;
4055
        phys_addr += (addr & ~TARGET_PAGE_MASK);
4056
        if (is_write)
4057
            cpu_physical_memory_write_rom(phys_addr, buf, l);
4058
        else
4059
            cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4060
        len -= l;
4061
        buf += l;
4062
        addr += l;
4063
    }
4064
    return 0;
4065
}
4066
#endif
4067

    
4068
/* in deterministic execution mode, instructions doing device I/Os
4069
   must be at the end of the TB */
4070
void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4071
{
4072
    TranslationBlock *tb;
4073
    uint32_t n, cflags;
4074
    target_ulong pc, cs_base;
4075
    uint64_t flags;
4076

    
4077
    tb = tb_find_pc(retaddr);
4078
    if (!tb) {
4079
        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
4080
                  (void *)retaddr);
4081
    }
4082
    n = env->icount_decr.u16.low + tb->icount;
4083
    cpu_restore_state(tb, env, retaddr);
4084
    /* Calculate how many instructions had been executed before the fault
4085
       occurred.  */
4086
    n = n - env->icount_decr.u16.low;
4087
    /* Generate a new TB ending on the I/O insn.  */
4088
    n++;
4089
    /* On MIPS and SH, delay slot instructions can only be restarted if
4090
       they were already the first instruction in the TB.  If this is not
4091
       the first instruction in a TB then re-execute the preceding
4092
       branch.  */
4093
#if defined(TARGET_MIPS)
4094
    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4095
        env->active_tc.PC -= 4;
4096
        env->icount_decr.u16.low++;
4097
        env->hflags &= ~MIPS_HFLAG_BMASK;
4098
    }
4099
#elif defined(TARGET_SH4)
4100
    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4101
            && n > 1) {
4102
        env->pc -= 2;
4103
        env->icount_decr.u16.low++;
4104
        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4105
    }
4106
#endif
4107
    /* This should never happen.  */
4108
    if (n > CF_COUNT_MASK)
4109
        cpu_abort(env, "TB too big during recompile");
4110

    
4111
    cflags = n | CF_LAST_IO;
4112
    pc = tb->pc;
4113
    cs_base = tb->cs_base;
4114
    flags = tb->flags;
4115
    tb_phys_invalidate(tb, -1);
4116
    /* FIXME: In theory this could raise an exception.  In practice
4117
       we have already translated the block once so it's probably ok.  */
4118
    tb_gen_code(env, pc, cs_base, flags, cflags);
4119
    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4120
       the first in the TB) then we end up generating a whole new TB and
4121
       repeating the fault, which is horribly inefficient.
4122
       Better would be to execute just this insn uncached, or generate a
4123
       second new TB.  */
4124
    cpu_resume_from_signal(env, NULL);
4125
}
4126

    
4127
#if !defined(CONFIG_USER_ONLY)
4128

    
4129
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4130
{
4131
    int i, target_code_size, max_target_code_size;
4132
    int direct_jmp_count, direct_jmp2_count, cross_page;
4133
    TranslationBlock *tb;
4134

    
4135
    target_code_size = 0;
4136
    max_target_code_size = 0;
4137
    cross_page = 0;
4138
    direct_jmp_count = 0;
4139
    direct_jmp2_count = 0;
4140
    for(i = 0; i < nb_tbs; i++) {
4141
        tb = &tbs[i];
4142
        target_code_size += tb->size;
4143
        if (tb->size > max_target_code_size)
4144
            max_target_code_size = tb->size;
4145
        if (tb->page_addr[1] != -1)
4146
            cross_page++;
4147
        if (tb->tb_next_offset[0] != 0xffff) {
4148
            direct_jmp_count++;
4149
            if (tb->tb_next_offset[1] != 0xffff) {
4150
                direct_jmp2_count++;
4151
            }
4152
        }
4153
    }
4154
    /* XXX: avoid using doubles ? */
4155
    cpu_fprintf(f, "Translation buffer state:\n");
4156
    cpu_fprintf(f, "gen code size       %td/%zd\n",
4157
                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4158
    cpu_fprintf(f, "TB count            %d/%d\n", 
4159
                nb_tbs, code_gen_max_blocks);
4160
    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
4161
                nb_tbs ? target_code_size / nb_tbs : 0,
4162
                max_target_code_size);
4163
    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
4164
                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4165
                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4166
    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4167
            cross_page,
4168
            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4169
    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
4170
                direct_jmp_count,
4171
                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4172
                direct_jmp2_count,
4173
                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4174
    cpu_fprintf(f, "\nStatistics:\n");
4175
    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
4176
    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4177
    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
4178
    tcg_dump_info(f, cpu_fprintf);
4179
}
4180

    
4181
/*
4182
 * A helper function for the _utterly broken_ virtio device model to find out if
4183
 * it's running on a big endian machine. Don't do this at home kids!
4184
 */
4185
bool virtio_is_big_endian(void);
4186
bool virtio_is_big_endian(void)
4187
{
4188
#if defined(TARGET_WORDS_BIGENDIAN)
4189
    return true;
4190
#else
4191
    return false;
4192
#endif
4193
}
4194

    
4195
#endif
4196

    
4197
#ifndef CONFIG_USER_ONLY
4198
bool cpu_physical_memory_is_io(hwaddr phys_addr)
4199
{
4200
    MemoryRegionSection *section;
4201

    
4202
    section = phys_page_find(address_space_memory.dispatch,
4203
                             phys_addr >> TARGET_PAGE_BITS);
4204

    
4205
    return !(memory_region_is_ram(section->mr) ||
4206
             memory_region_is_romd(section->mr));
4207
}
4208
#endif