Revision 5cd2c5b6

b/cpu-all.h
745 745
#define PAGE_RESERVED  0x0020
746 746

  
747 747
void page_dump(FILE *f);
748
int walk_memory_regions(void *,
749
    int (*fn)(void *, unsigned long, unsigned long, unsigned long));
748

  
749
typedef int (*walk_memory_regions_fn)(void *, unsigned long,
750
                                      unsigned long, unsigned long);
751
int walk_memory_regions(void *, walk_memory_regions_fn);
752

  
750 753
int page_get_flags(target_ulong address);
751 754
void page_set_flags(target_ulong start, target_ulong end, int flags);
752 755
int page_check_range(target_ulong start, target_ulong len, int flags);
b/exec.c
141 141
    ram_addr_t region_offset;
142 142
} PhysPageDesc;
143 143

  
144
#define L2_BITS 10
145
#if defined(CONFIG_USER_ONLY) && defined(TARGET_VIRT_ADDR_SPACE_BITS)
146
/* XXX: this is a temporary hack for alpha target.
147
 *      In the future, this is to be replaced by a multi-level table
148
 *      to actually be able to handle the complete 64 bits address space.
149
 */
150
#define L1_BITS (TARGET_VIRT_ADDR_SPACE_BITS - L2_BITS - TARGET_PAGE_BITS)
144
/* In system mode we want L1_MAP to be based on physical addresses,
145
   while in user mode we want it to be based on virtual addresses.  */
146
#if !defined(CONFIG_USER_ONLY)
147
# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
151 148
#else
152
#define L1_BITS (32 - L2_BITS - TARGET_PAGE_BITS)
149
# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
153 150
#endif
154 151

  
155
#define L1_SIZE (1 << L1_BITS)
152
/* Size of the L2 (and L3, etc) page tables.  */
153
#define L2_BITS 10
156 154
#define L2_SIZE (1 << L2_BITS)
157 155

  
156
/* The bits remaining after N lower levels of page tables.  */
157
#define P_L1_BITS_REM \
158
    ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
159
#define V_L1_BITS_REM \
160
    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
161

  
162
/* Size of the L1 page table.  Avoid silly small sizes.  */
163
#if P_L1_BITS_REM < 4
164
#define P_L1_BITS  (P_L1_BITS_REM + L2_BITS)
165
#else
166
#define P_L1_BITS  P_L1_BITS_REM
167
#endif
168

  
169
#if V_L1_BITS_REM < 4
170
#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
171
#else
172
#define V_L1_BITS  V_L1_BITS_REM
173
#endif
174

  
175
#define P_L1_SIZE  ((target_phys_addr_t)1 << P_L1_BITS)
176
#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
177

  
178
#define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
179
#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180

  
158 181
unsigned long qemu_real_host_page_size;
159 182
unsigned long qemu_host_page_bits;
160 183
unsigned long qemu_host_page_size;
161 184
unsigned long qemu_host_page_mask;
162 185

  
163
/* XXX: for system emulation, it could just be an array */
164
static PageDesc *l1_map[L1_SIZE];
186
/* This is a multi-level map on the virtual address space.
187
   The bottom level has pointers to PageDesc.  */
188
static void *l1_map[V_L1_SIZE];
165 189

  
166 190
#if !defined(CONFIG_USER_ONLY)
167
static PhysPageDesc **l1_phys_map;
191
/* This is a multi-level map on the physical address space.
192
   The bottom level has pointers to PhysPageDesc.  */
193
static void *l1_phys_map[P_L1_SIZE];
168 194

  
169 195
static void io_mem_init(void);
170 196

  
......
239 265
    while ((1 << qemu_host_page_bits) < qemu_host_page_size)
240 266
        qemu_host_page_bits++;
241 267
    qemu_host_page_mask = ~(qemu_host_page_size - 1);
242
#if !defined(CONFIG_USER_ONLY)
243
    l1_phys_map = qemu_vmalloc(L1_SIZE * sizeof(void *));
244
    memset(l1_phys_map, 0, L1_SIZE * sizeof(void *));
245
#endif
246 268

  
247 269
#if !defined(_WIN32) && defined(CONFIG_USER_ONLY)
248 270
    {
249
        long long startaddr, endaddr;
250 271
        FILE *f;
251
        int n;
252 272

  
253
        mmap_lock();
254 273
        last_brk = (unsigned long)sbrk(0);
274

  
255 275
        f = fopen("/proc/self/maps", "r");
256 276
        if (f) {
277
            mmap_lock();
278

  
257 279
            do {
258
                n = fscanf (f, "%llx-%llx %*[^\n]\n", &startaddr, &endaddr);
259
                if (n == 2) {
260
                    startaddr = MIN(startaddr,
261
                                    (1ULL << TARGET_PHYS_ADDR_SPACE_BITS) - 1);
262
                    endaddr = MIN(endaddr,
263
                                    (1ULL << TARGET_PHYS_ADDR_SPACE_BITS) - 1);
264
                    page_set_flags(startaddr & TARGET_PAGE_MASK,
265
                                   TARGET_PAGE_ALIGN(endaddr),
266
                                   PAGE_RESERVED); 
280
                unsigned long startaddr, endaddr;
281
                int n;
282

  
283
                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
284

  
285
                if (n == 2 && h2g_valid(startaddr)) {
286
                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
287

  
288
                    if (h2g_valid(endaddr)) {
289
                        endaddr = h2g(endaddr);
290
                    } else {
291
                        endaddr = ~0ul;
292
                    }
293
                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
267 294
                }
268 295
            } while (!feof(f));
296

  
269 297
            fclose(f);
298
            mmap_unlock();
270 299
        }
271
        mmap_unlock();
272 300
    }
273 301
#endif
274 302
}
275 303

  
276
static inline PageDesc **page_l1_map(target_ulong index)
304
static PageDesc *page_find_alloc(target_ulong index, int alloc)
277 305
{
278
#if TARGET_LONG_BITS > 32
279
    /* Host memory outside guest VM.  For 32-bit targets we have already
280
       excluded high addresses.  */
281
    if (index > ((target_ulong)L2_SIZE * L1_SIZE))
282
        return NULL;
306
#if defined(CONFIG_USER_ONLY)
307
    /* We can't use qemu_malloc because it may recurse into a locked mutex.
308
       Neither can we record the new pages we reserve while allocating a
309
       given page because that may recurse into an unallocated page table
310
       entry.  Stuff the allocations we do make into a queue and process
311
       them after having completed one entire page table allocation.  */
312

  
313
    unsigned long reserve[2 * (V_L1_SHIFT / L2_BITS)];
314
    int reserve_idx = 0;
315

  
316
# define ALLOC(P, SIZE)                                 \
317
    do {                                                \
318
        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
319
                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
320
        if (h2g_valid(P)) {                             \
321
            reserve[reserve_idx] = h2g(P);              \
322
            reserve[reserve_idx + 1] = SIZE;            \
323
            reserve_idx += 2;                           \
324
        }                                               \
325
    } while (0)
326
#else
327
# define ALLOC(P, SIZE) \
328
    do { P = qemu_mallocz(SIZE); } while (0)
283 329
#endif
284
    return &l1_map[index >> L2_BITS];
285
}
286 330

  
287
static inline PageDesc *page_find_alloc(target_ulong index)
288
{
289
    PageDesc **lp, *p;
290
    lp = page_l1_map(index);
291
    if (!lp)
292
        return NULL;
331
    PageDesc *pd;
332
    void **lp;
333
    int i;
293 334

  
294
    p = *lp;
295
    if (!p) {
296
        /* allocate if not found */
297
#if defined(CONFIG_USER_ONLY)
298
        size_t len = sizeof(PageDesc) * L2_SIZE;
299
        /* Don't use qemu_malloc because it may recurse.  */
300
        p = mmap(NULL, len, PROT_READ | PROT_WRITE,
301
                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
302
        *lp = p;
303
        if (h2g_valid(p)) {
304
            unsigned long addr = h2g(p);
305
            page_set_flags(addr & TARGET_PAGE_MASK,
306
                           TARGET_PAGE_ALIGN(addr + len),
307
                           PAGE_RESERVED); 
335
    /* Level 1.  Always allocated.  */
336
    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
337

  
338
    /* Level 2..N-1.  */
339
    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
340
        void **p = *lp;
341

  
342
        if (p == NULL) {
343
            if (!alloc) {
344
                return NULL;
345
            }
346
            ALLOC(p, sizeof(void *) * L2_SIZE);
347
            *lp = p;
308 348
        }
309
#else
310
        p = qemu_mallocz(sizeof(PageDesc) * L2_SIZE);
311
        *lp = p;
312
#endif
349

  
350
        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
313 351
    }
314
    return p + (index & (L2_SIZE - 1));
352

  
353
    pd = *lp;
354
    if (pd == NULL) {
355
        if (!alloc) {
356
            return NULL;
357
        }
358
        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
359
        *lp = pd;
360
    }
361

  
362
#undef ALLOC
363
#if defined(CONFIG_USER_ONLY)
364
    for (i = 0; i < reserve_idx; i += 2) {
365
        unsigned long addr = reserve[i];
366
        unsigned long len = reserve[i + 1];
367

  
368
        page_set_flags(addr & TARGET_PAGE_MASK,
369
                       TARGET_PAGE_ALIGN(addr + len),
370
                       PAGE_RESERVED);
371
    }
372
#endif
373

  
374
    return pd + (index & (L2_SIZE - 1));
315 375
}
316 376

  
317 377
static inline PageDesc *page_find(target_ulong index)
318 378
{
319
    PageDesc **lp, *p;
320
    lp = page_l1_map(index);
321
    if (!lp)
322
        return NULL;
323

  
324
    p = *lp;
325
    if (!p) {
326
        return NULL;
327
    }
328
    return p + (index & (L2_SIZE - 1));
379
    return page_find_alloc(index, 0);
329 380
}
330 381

  
331 382
#if !defined(CONFIG_USER_ONLY)
332 383
static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
333 384
{
334
    void **lp, **p;
335 385
    PhysPageDesc *pd;
386
    void **lp;
387
    int i;
336 388

  
337
    p = (void **)l1_phys_map;
338
#if TARGET_PHYS_ADDR_SPACE_BITS > 32
389
    /* Level 1.  Always allocated.  */
390
    lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
339 391

  
340
#if TARGET_PHYS_ADDR_SPACE_BITS > (32 + L1_BITS)
341
#error unsupported TARGET_PHYS_ADDR_SPACE_BITS
342
#endif
343
    lp = p + ((index >> (L1_BITS + L2_BITS)) & (L1_SIZE - 1));
344
    p = *lp;
345
    if (!p) {
346
        /* allocate if not found */
347
        if (!alloc)
348
            return NULL;
349
        p = qemu_vmalloc(sizeof(void *) * L1_SIZE);
350
        memset(p, 0, sizeof(void *) * L1_SIZE);
351
        *lp = p;
392
    /* Level 2..N-1.  */
393
    for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
394
        void **p = *lp;
395
        if (p == NULL) {
396
            if (!alloc) {
397
                return NULL;
398
            }
399
            *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
400
        }
401
        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
352 402
    }
353
#endif
354
    lp = p + ((index >> L2_BITS) & (L1_SIZE - 1));
403

  
355 404
    pd = *lp;
356
    if (!pd) {
405
    if (pd == NULL) {
357 406
        int i;
358
        /* allocate if not found */
359
        if (!alloc)
407

  
408
        if (!alloc) {
360 409
            return NULL;
361
        pd = qemu_vmalloc(sizeof(PhysPageDesc) * L2_SIZE);
362
        *lp = pd;
410
        }
411

  
412
        *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
413

  
363 414
        for (i = 0; i < L2_SIZE; i++) {
364
          pd[i].phys_offset = IO_MEM_UNASSIGNED;
365
          pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
415
            pd[i].phys_offset = IO_MEM_UNASSIGNED;
416
            pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
366 417
        }
367 418
    }
368
    return ((PhysPageDesc *)pd) + (index & (L2_SIZE - 1));
419

  
420
    return pd + (index & (L2_SIZE - 1));
369 421
}
370 422

  
371 423
static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
......
573 625
    p->code_write_count = 0;
574 626
}
575 627

  
576
/* set to NULL all the 'first_tb' fields in all PageDescs */
577
static void page_flush_tb(void)
628
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
629

  
630
static void page_flush_tb_1 (int level, void **lp)
578 631
{
579
    int i, j;
580
    PageDesc *p;
632
    int i;
581 633

  
582
    for(i = 0; i < L1_SIZE; i++) {
583
        p = l1_map[i];
584
        if (p) {
585
            for(j = 0; j < L2_SIZE; j++) {
586
                p->first_tb = NULL;
587
                invalidate_page_bitmap(p);
588
                p++;
589
            }
634
    if (*lp == NULL) {
635
        return;
636
    }
637
    if (level == 0) {
638
        PageDesc *pd = *lp;
639
        for (i = 0; i < L2_BITS; ++i) {
640
            pd[i].first_tb = NULL;
641
            invalidate_page_bitmap(pd + i);
642
        }
643
    } else {
644
        void **pp = *lp;
645
        for (i = 0; i < L2_BITS; ++i) {
646
            page_flush_tb_1 (level - 1, pp + i);
590 647
        }
591 648
    }
592 649
}
593 650

  
651
static void page_flush_tb(void)
652
{
653
    int i;
654
    for (i = 0; i < V_L1_SIZE; i++) {
655
        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
656
    }
657
}
658

  
594 659
/* flush all the translation blocks */
595 660
/* XXX: tb_flush is currently not thread safe */
596 661
void tb_flush(CPUState *env1)
......
1081 1146
    TranslationBlock *last_first_tb;
1082 1147

  
1083 1148
    tb->page_addr[n] = page_addr;
1084
    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS);
1149
    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1085 1150
    tb->page_next[n] = p->first_tb;
1086 1151
    last_first_tb = p->first_tb;
1087 1152
    p->first_tb = (TranslationBlock *)((long)tb | n);
......
1641 1706
    return 0;
1642 1707
}
1643 1708

  
1644
static void phys_page_for_each_in_l1_map(PhysPageDesc **phys_map,
1645
                                         CPUPhysMemoryClient *client)
1709
static void phys_page_for_each_1(CPUPhysMemoryClient *client,
1710
                                 int level, void **lp)
1646 1711
{
1647
    PhysPageDesc *pd;
1648
    int l1, l2;
1712
    int i;
1649 1713

  
1650
    for (l1 = 0; l1 < L1_SIZE; ++l1) {
1651
        pd = phys_map[l1];
1652
        if (!pd) {
1653
            continue;
1654
        }
1655
        for (l2 = 0; l2 < L2_SIZE; ++l2) {
1656
            if (pd[l2].phys_offset == IO_MEM_UNASSIGNED) {
1657
                continue;
1714
    if (*lp == NULL) {
1715
        return;
1716
    }
1717
    if (level == 0) {
1718
        PhysPageDesc *pd = *lp;
1719
        for (i = 0; i < L2_BITS; ++i) {
1720
            if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1721
                client->set_memory(client, pd[i].region_offset,
1722
                                   TARGET_PAGE_SIZE, pd[i].phys_offset);
1658 1723
            }
1659
            client->set_memory(client, pd[l2].region_offset,
1660
                               TARGET_PAGE_SIZE, pd[l2].phys_offset);
1724
        }
1725
    } else {
1726
        void **pp = *lp;
1727
        for (i = 0; i < L2_BITS; ++i) {
1728
            phys_page_for_each_1(client, level - 1, pp + i);
1661 1729
        }
1662 1730
    }
1663 1731
}
1664 1732

  
1665 1733
static void phys_page_for_each(CPUPhysMemoryClient *client)
1666 1734
{
1667
#if TARGET_PHYS_ADDR_SPACE_BITS > 32
1668

  
1669
#if TARGET_PHYS_ADDR_SPACE_BITS > (32 + L1_BITS)
1670
#error unsupported TARGET_PHYS_ADDR_SPACE_BITS
1671
#endif
1672
    void **phys_map = (void **)l1_phys_map;
1673
    int l1;
1674
    if (!l1_phys_map) {
1675
        return;
1676
    }
1677
    for (l1 = 0; l1 < L1_SIZE; ++l1) {
1678
        if (phys_map[l1]) {
1679
            phys_page_for_each_in_l1_map(phys_map[l1], client);
1680
        }
1681
    }
1682
#else
1683
    if (!l1_phys_map) {
1684
        return;
1735
    int i;
1736
    for (i = 0; i < P_L1_SIZE; ++i) {
1737
        phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1738
                             l1_phys_map + 1);
1685 1739
    }
1686
    phys_page_for_each_in_l1_map(l1_phys_map, client);
1687
#endif
1688 1740
}
1689 1741

  
1690 1742
void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
......
2148 2200
 * Walks guest process memory "regions" one by one
2149 2201
 * and calls callback function 'fn' for each region.
2150 2202
 */
2151
int walk_memory_regions(void *priv,
2152
    int (*fn)(void *, unsigned long, unsigned long, unsigned long))
2203

  
2204
struct walk_memory_regions_data
2153 2205
{
2154
    unsigned long start, end;
2155
    PageDesc *p = NULL;
2156
    int i, j, prot, prot1;
2157
    int rc = 0;
2206
    walk_memory_regions_fn fn;
2207
    void *priv;
2208
    unsigned long start;
2209
    int prot;
2210
};
2158 2211

  
2159
    start = end = -1;
2160
    prot = 0;
2212
static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2213
                                   unsigned long end, int new_prot)
2214
{
2215
    if (data->start != -1ul) {
2216
        int rc = data->fn(data->priv, data->start, end, data->prot);
2217
        if (rc != 0) {
2218
            return rc;
2219
        }
2220
    }
2221

  
2222
    data->start = (new_prot ? end : -1ul);
2223
    data->prot = new_prot;
2224

  
2225
    return 0;
2226
}
2227

  
2228
static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2229
                                 unsigned long base, int level, void **lp)
2230
{
2231
    unsigned long pa;
2232
    int i, rc;
2161 2233

  
2162
    for (i = 0; i <= L1_SIZE; i++) {
2163
        p = (i < L1_SIZE) ? l1_map[i] : NULL;
2164
        for (j = 0; j < L2_SIZE; j++) {
2165
            prot1 = (p == NULL) ? 0 : p[j].flags;
2166
            /*
2167
             * "region" is one continuous chunk of memory
2168
             * that has same protection flags set.
2169
             */
2170
            if (prot1 != prot) {
2171
                end = (i << (32 - L1_BITS)) | (j << TARGET_PAGE_BITS);
2172
                if (start != -1) {
2173
                    rc = (*fn)(priv, start, end, prot);
2174
                    /* callback can stop iteration by returning != 0 */
2175
                    if (rc != 0)
2176
                        return (rc);
2234
    if (*lp == NULL) {
2235
        return walk_memory_regions_end(data, base, 0);
2236
    }
2237

  
2238
    if (level == 0) {
2239
        PageDesc *pd = *lp;
2240
        for (i = 0; i < L2_BITS; ++i) {
2241
            int prot = pd[i].flags;
2242

  
2243
            pa = base | (i << TARGET_PAGE_BITS);
2244
            if (prot != data->prot) {
2245
                rc = walk_memory_regions_end(data, pa, prot);
2246
                if (rc != 0) {
2247
                    return rc;
2177 2248
                }
2178
                if (prot1 != 0)
2179
                    start = end;
2180
                else
2181
                    start = -1;
2182
                prot = prot1;
2183 2249
            }
2184
            if (p == NULL)
2185
                break;
2250
        }
2251
    } else {
2252
        void **pp = *lp;
2253
        for (i = 0; i < L2_BITS; ++i) {
2254
            pa = base | (i << (TARGET_PAGE_BITS + L2_BITS * level));
2255
            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2256
            if (rc != 0) {
2257
                return rc;
2258
            }
2259
        }
2260
    }
2261

  
2262
    return 0;
2263
}
2264

  
2265
int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2266
{
2267
    struct walk_memory_regions_data data;
2268
    unsigned long i;
2269

  
2270
    data.fn = fn;
2271
    data.priv = priv;
2272
    data.start = -1ul;
2273
    data.prot = 0;
2274

  
2275
    for (i = 0; i < V_L1_SIZE; i++) {
2276
        int rc = walk_memory_regions_1(&data, i << V_L1_SHIFT,
2277
                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2278
        if (rc != 0) {
2279
            return rc;
2186 2280
        }
2187 2281
    }
2188
    return (rc);
2282

  
2283
    return walk_memory_regions_end(&data, 0, 0);
2189 2284
}
2190 2285

  
2191 2286
static int dump_region(void *priv, unsigned long start,

Also available in: Unified diff