Revision f7d0fe02 block/qcow2.c

b/block/qcow2.c
26 26
#include "module.h"
27 27
#include <zlib.h>
28 28
#include "aes.h"
29
#include "block/qcow2.h"
29 30

  
30 31
/*
31 32
  Differences with QCOW:
......
47 48
//#define DEBUG_ALLOC2
48 49
//#define DEBUG_EXT
49 50

  
50
#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
51
#define QCOW_VERSION 2
52

  
53
#define QCOW_CRYPT_NONE 0
54
#define QCOW_CRYPT_AES  1
55

  
56
#define QCOW_MAX_CRYPT_CLUSTERS 32
57

  
58
/* indicate that the refcount of the referenced cluster is exactly one. */
59
#define QCOW_OFLAG_COPIED     (1LL << 63)
60
/* indicate that the cluster is compressed (they never have the copied flag) */
61
#define QCOW_OFLAG_COMPRESSED (1LL << 62)
62

  
63
#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
64

  
65
#define MIN_CLUSTER_BITS 9
66
#define MAX_CLUSTER_BITS 16
67

  
68
typedef struct QCowHeader {
69
    uint32_t magic;
70
    uint32_t version;
71
    uint64_t backing_file_offset;
72
    uint32_t backing_file_size;
73
    uint32_t cluster_bits;
74
    uint64_t size; /* in bytes */
75
    uint32_t crypt_method;
76
    uint32_t l1_size; /* XXX: save number of clusters instead ? */
77
    uint64_t l1_table_offset;
78
    uint64_t refcount_table_offset;
79
    uint32_t refcount_table_clusters;
80
    uint32_t nb_snapshots;
81
    uint64_t snapshots_offset;
82
} QCowHeader;
83

  
84 51

  
85 52
typedef struct {
86 53
    uint32_t magic;
......
110 77
    /* name follows  */
111 78
} QCowSnapshotHeader;
112 79

  
113
#define L2_CACHE_SIZE 16
114

  
115
typedef struct QCowSnapshot {
116
    uint64_t l1_table_offset;
117
    uint32_t l1_size;
118
    char *id_str;
119
    char *name;
120
    uint32_t vm_state_size;
121
    uint32_t date_sec;
122
    uint32_t date_nsec;
123
    uint64_t vm_clock_nsec;
124
} QCowSnapshot;
125

  
126
typedef struct BDRVQcowState {
127
    BlockDriverState *hd;
128
    int cluster_bits;
129
    int cluster_size;
130
    int cluster_sectors;
131
    int l2_bits;
132
    int l2_size;
133
    int l1_size;
134
    int l1_vm_state_index;
135
    int csize_shift;
136
    int csize_mask;
137
    uint64_t cluster_offset_mask;
138
    uint64_t l1_table_offset;
139
    uint64_t *l1_table;
140
    uint64_t *l2_cache;
141
    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
142
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
143
    uint8_t *cluster_cache;
144
    uint8_t *cluster_data;
145
    uint64_t cluster_cache_offset;
146

  
147
    uint64_t *refcount_table;
148
    uint64_t refcount_table_offset;
149
    uint32_t refcount_table_size;
150
    uint64_t refcount_block_cache_offset;
151
    uint16_t *refcount_block_cache;
152
    int64_t free_cluster_index;
153
    int64_t free_byte_offset;
154

  
155
    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
156
    uint32_t crypt_method_header;
157
    AES_KEY aes_encrypt_key;
158
    AES_KEY aes_decrypt_key;
159
    uint64_t snapshots_offset;
160
    int snapshots_size;
161
    int nb_snapshots;
162
    QCowSnapshot *snapshots;
163
} BDRVQcowState;
164 80

  
165 81
static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
166 82
static int qcow_read(BlockDriverState *bs, int64_t sector_num,
167 83
                     uint8_t *buf, int nb_sectors);
168 84
static int qcow_read_snapshots(BlockDriverState *bs);
169 85
static void qcow_free_snapshots(BlockDriverState *bs);
170
static int refcount_init(BlockDriverState *bs);
171
static void refcount_close(BlockDriverState *bs);
172
static int get_refcount(BlockDriverState *bs, int64_t cluster_index);
173
static int update_cluster_refcount(BlockDriverState *bs,
174
                                   int64_t cluster_index,
175
                                   int addend);
176
static int update_refcount(BlockDriverState *bs,
177
                            int64_t offset, int64_t length,
178
                            int addend);
179
static int64_t alloc_clusters(BlockDriverState *bs, int64_t size);
180
static int64_t alloc_bytes(BlockDriverState *bs, int size);
181
static void free_clusters(BlockDriverState *bs,
182
                          int64_t offset, int64_t size);
183
static int check_refcounts(BlockDriverState *bs);
184 86

  
185 87
static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
186 88
{
......
482 384
    return 0;
483 385
}
484 386

  
485
static void l2_cache_reset(BlockDriverState *bs)
387
void l2_cache_reset(BlockDriverState *bs)
486 388
{
487 389
    BDRVQcowState *s = bs->opaque;
488 390

  
......
691 593
    return l2_table;
692 594
}
693 595

  
694
static int size_to_clusters(BDRVQcowState *s, int64_t size)
695
{
696
    return (size + (s->cluster_size - 1)) >> s->cluster_bits;
697
}
698

  
699 596
static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
700 597
        uint64_t *l2_table, uint64_t start, uint64_t mask)
701 598
{
......
1522 1419
    bdrv_delete(s->hd);
1523 1420
}
1524 1421

  
1525
/* XXX: use std qcow open function ? */
1526
typedef struct QCowCreateState {
1527
    int cluster_size;
1528
    int cluster_bits;
1529
    uint16_t *refcount_block;
1530
    uint64_t *refcount_table;
1531
    int64_t l1_table_offset;
1532
    int64_t refcount_table_offset;
1533
    int64_t refcount_block_offset;
1534
} QCowCreateState;
1535

  
1536
static void create_refcount_update(QCowCreateState *s,
1537
                                   int64_t offset, int64_t size)
1538
{
1539
    int refcount;
1540
    int64_t start, last, cluster_offset;
1541
    uint16_t *p;
1542

  
1543
    start = offset & ~(s->cluster_size - 1);
1544
    last = (offset + size - 1)  & ~(s->cluster_size - 1);
1545
    for(cluster_offset = start; cluster_offset <= last;
1546
        cluster_offset += s->cluster_size) {
1547
        p = &s->refcount_block[cluster_offset >> s->cluster_bits];
1548
        refcount = be16_to_cpu(*p);
1549
        refcount++;
1550
        *p = cpu_to_be16(refcount);
1551
    }
1552
}
1553

  
1554 1422
static int get_bits_from_size(size_t size)
1555 1423
{
1556 1424
    int res = 0;
......
1834 1702
/*********************************************************/
1835 1703
/* snapshot support */
1836 1704

  
1837
/* update the refcounts of snapshots and the copied flag */
1838
static int update_snapshot_refcount(BlockDriverState *bs,
1839
                                    int64_t l1_table_offset,
1840
                                    int l1_size,
1841
                                    int addend)
1842
{
1843
    BDRVQcowState *s = bs->opaque;
1844
    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
1845
    int64_t old_offset, old_l2_offset;
1846
    int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
1847

  
1848
    l2_cache_reset(bs);
1849

  
1850
    l2_table = NULL;
1851
    l1_table = NULL;
1852
    l1_size2 = l1_size * sizeof(uint64_t);
1853
    l1_allocated = 0;
1854
    if (l1_table_offset != s->l1_table_offset) {
1855
        l1_table = qemu_malloc(l1_size2);
1856
        l1_allocated = 1;
1857
        if (bdrv_pread(s->hd, l1_table_offset,
1858
                       l1_table, l1_size2) != l1_size2)
1859
            goto fail;
1860
        for(i = 0;i < l1_size; i++)
1861
            be64_to_cpus(&l1_table[i]);
1862
    } else {
1863
        assert(l1_size == s->l1_size);
1864
        l1_table = s->l1_table;
1865
        l1_allocated = 0;
1866
    }
1867

  
1868
    l2_size = s->l2_size * sizeof(uint64_t);
1869
    l2_table = qemu_malloc(l2_size);
1870
    l1_modified = 0;
1871
    for(i = 0; i < l1_size; i++) {
1872
        l2_offset = l1_table[i];
1873
        if (l2_offset) {
1874
            old_l2_offset = l2_offset;
1875
            l2_offset &= ~QCOW_OFLAG_COPIED;
1876
            l2_modified = 0;
1877
            if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
1878
                goto fail;
1879
            for(j = 0; j < s->l2_size; j++) {
1880
                offset = be64_to_cpu(l2_table[j]);
1881
                if (offset != 0) {
1882
                    old_offset = offset;
1883
                    offset &= ~QCOW_OFLAG_COPIED;
1884
                    if (offset & QCOW_OFLAG_COMPRESSED) {
1885
                        nb_csectors = ((offset >> s->csize_shift) &
1886
                                       s->csize_mask) + 1;
1887
                        if (addend != 0)
1888
                            update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
1889
                                            nb_csectors * 512, addend);
1890
                        /* compressed clusters are never modified */
1891
                        refcount = 2;
1892
                    } else {
1893
                        if (addend != 0) {
1894
                            refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
1895
                        } else {
1896
                            refcount = get_refcount(bs, offset >> s->cluster_bits);
1897
                        }
1898
                    }
1899

  
1900
                    if (refcount == 1) {
1901
                        offset |= QCOW_OFLAG_COPIED;
1902
                    }
1903
                    if (offset != old_offset) {
1904
                        l2_table[j] = cpu_to_be64(offset);
1905
                        l2_modified = 1;
1906
                    }
1907
                }
1908
            }
1909
            if (l2_modified) {
1910
                if (bdrv_pwrite(s->hd,
1911
                                l2_offset, l2_table, l2_size) != l2_size)
1912
                    goto fail;
1913
            }
1914

  
1915
            if (addend != 0) {
1916
                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
1917
            } else {
1918
                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
1919
            }
1920
            if (refcount == 1) {
1921
                l2_offset |= QCOW_OFLAG_COPIED;
1922
            }
1923
            if (l2_offset != old_l2_offset) {
1924
                l1_table[i] = l2_offset;
1925
                l1_modified = 1;
1926
            }
1927
        }
1928
    }
1929
    if (l1_modified) {
1930
        for(i = 0; i < l1_size; i++)
1931
            cpu_to_be64s(&l1_table[i]);
1932
        if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
1933
                        l1_size2) != l1_size2)
1934
            goto fail;
1935
        for(i = 0; i < l1_size; i++)
1936
            be64_to_cpus(&l1_table[i]);
1937
    }
1938
    if (l1_allocated)
1939
        qemu_free(l1_table);
1940
    qemu_free(l2_table);
1941
    return 0;
1942
 fail:
1943
    if (l1_allocated)
1944
        qemu_free(l1_table);
1945
    qemu_free(l2_table);
1946
    return -EIO;
1947
}
1948 1705

  
1949 1706
static void qcow_free_snapshots(BlockDriverState *bs)
1950 1707
{
......
2306 2063
    return s->nb_snapshots;
2307 2064
}
2308 2065

  
2309
/*********************************************************/
2310
/* refcount handling */
2311

  
2312
static int refcount_init(BlockDriverState *bs)
2313
{
2314
    BDRVQcowState *s = bs->opaque;
2315
    int ret, refcount_table_size2, i;
2316

  
2317
    s->refcount_block_cache = qemu_malloc(s->cluster_size);
2318
    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
2319
    s->refcount_table = qemu_malloc(refcount_table_size2);
2320
    if (s->refcount_table_size > 0) {
2321
        ret = bdrv_pread(s->hd, s->refcount_table_offset,
2322
                         s->refcount_table, refcount_table_size2);
2323
        if (ret != refcount_table_size2)
2324
            goto fail;
2325
        for(i = 0; i < s->refcount_table_size; i++)
2326
            be64_to_cpus(&s->refcount_table[i]);
2327
    }
2328
    return 0;
2329
 fail:
2330
    return -ENOMEM;
2331
}
2332

  
2333
static void refcount_close(BlockDriverState *bs)
2334
{
2335
    BDRVQcowState *s = bs->opaque;
2336
    qemu_free(s->refcount_block_cache);
2337
    qemu_free(s->refcount_table);
2338
}
2339

  
2340

  
2341
static int load_refcount_block(BlockDriverState *bs,
2342
                               int64_t refcount_block_offset)
2343
{
2344
    BDRVQcowState *s = bs->opaque;
2345
    int ret;
2346
    ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
2347
                     s->cluster_size);
2348
    if (ret != s->cluster_size)
2349
        return -EIO;
2350
    s->refcount_block_cache_offset = refcount_block_offset;
2351
    return 0;
2352
}
2353

  
2354
static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
2355
{
2356
    BDRVQcowState *s = bs->opaque;
2357
    int refcount_table_index, block_index;
2358
    int64_t refcount_block_offset;
2359

  
2360
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
2361
    if (refcount_table_index >= s->refcount_table_size)
2362
        return 0;
2363
    refcount_block_offset = s->refcount_table[refcount_table_index];
2364
    if (!refcount_block_offset)
2365
        return 0;
2366
    if (refcount_block_offset != s->refcount_block_cache_offset) {
2367
        /* better than nothing: return allocated if read error */
2368
        if (load_refcount_block(bs, refcount_block_offset) < 0)
2369
            return 1;
2370
    }
2371
    block_index = cluster_index &
2372
        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
2373
    return be16_to_cpu(s->refcount_block_cache[block_index]);
2374
}
2375

  
2376
/* return < 0 if error */
2377
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
2378
{
2379
    BDRVQcowState *s = bs->opaque;
2380
    int i, nb_clusters;
2381

  
2382
    nb_clusters = size_to_clusters(s, size);
2383
retry:
2384
    for(i = 0; i < nb_clusters; i++) {
2385
        int64_t i = s->free_cluster_index++;
2386
        if (get_refcount(bs, i) != 0)
2387
            goto retry;
2388
    }
2389
#ifdef DEBUG_ALLOC2
2390
    printf("alloc_clusters: size=%lld -> %lld\n",
2391
            size,
2392
            (s->free_cluster_index - nb_clusters) << s->cluster_bits);
2393
#endif
2394
    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
2395
}
2396

  
2397
static int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
2398
{
2399
    int64_t offset;
2400

  
2401
    offset = alloc_clusters_noref(bs, size);
2402
    update_refcount(bs, offset, size, 1);
2403
    return offset;
2404
}
2405

  
2406
/* only used to allocate compressed sectors. We try to allocate
2407
   contiguous sectors. size must be <= cluster_size */
2408
static int64_t alloc_bytes(BlockDriverState *bs, int size)
2409
{
2410
    BDRVQcowState *s = bs->opaque;
2411
    int64_t offset, cluster_offset;
2412
    int free_in_cluster;
2413

  
2414
    assert(size > 0 && size <= s->cluster_size);
2415
    if (s->free_byte_offset == 0) {
2416
        s->free_byte_offset = alloc_clusters(bs, s->cluster_size);
2417
    }
2418
 redo:
2419
    free_in_cluster = s->cluster_size -
2420
        (s->free_byte_offset & (s->cluster_size - 1));
2421
    if (size <= free_in_cluster) {
2422
        /* enough space in current cluster */
2423
        offset = s->free_byte_offset;
2424
        s->free_byte_offset += size;
2425
        free_in_cluster -= size;
2426
        if (free_in_cluster == 0)
2427
            s->free_byte_offset = 0;
2428
        if ((offset & (s->cluster_size - 1)) != 0)
2429
            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
2430
    } else {
2431
        offset = alloc_clusters(bs, s->cluster_size);
2432
        cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
2433
        if ((cluster_offset + s->cluster_size) == offset) {
2434
            /* we are lucky: contiguous data */
2435
            offset = s->free_byte_offset;
2436
            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
2437
            s->free_byte_offset += size;
2438
        } else {
2439
            s->free_byte_offset = offset;
2440
            goto redo;
2441
        }
2442
    }
2443
    return offset;
2444
}
2445

  
2446
static void free_clusters(BlockDriverState *bs,
2447
                          int64_t offset, int64_t size)
2448
{
2449
    update_refcount(bs, offset, size, -1);
2450
}
2451

  
2452
static int grow_refcount_table(BlockDriverState *bs, int min_size)
2453
{
2454
    BDRVQcowState *s = bs->opaque;
2455
    int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
2456
    uint64_t *new_table;
2457
    int64_t table_offset;
2458
    uint8_t data[12];
2459
    int old_table_size;
2460
    int64_t old_table_offset;
2461

  
2462
    if (min_size <= s->refcount_table_size)
2463
        return 0;
2464
    /* compute new table size */
2465
    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2466
    for(;;) {
2467
        if (refcount_table_clusters == 0) {
2468
            refcount_table_clusters = 1;
2469
        } else {
2470
            refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
2471
        }
2472
        new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
2473
        if (min_size <= new_table_size)
2474
            break;
2475
    }
2476
#ifdef DEBUG_ALLOC2
2477
    printf("grow_refcount_table from %d to %d\n",
2478
           s->refcount_table_size,
2479
           new_table_size);
2480
#endif
2481
    new_table_size2 = new_table_size * sizeof(uint64_t);
2482
    new_table = qemu_mallocz(new_table_size2);
2483
    memcpy(new_table, s->refcount_table,
2484
           s->refcount_table_size * sizeof(uint64_t));
2485
    for(i = 0; i < s->refcount_table_size; i++)
2486
        cpu_to_be64s(&new_table[i]);
2487
    /* Note: we cannot update the refcount now to avoid recursion */
2488
    table_offset = alloc_clusters_noref(bs, new_table_size2);
2489
    ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
2490
    if (ret != new_table_size2)
2491
        goto fail;
2492
    for(i = 0; i < s->refcount_table_size; i++)
2493
        be64_to_cpus(&new_table[i]);
2494

  
2495
    cpu_to_be64w((uint64_t*)data, table_offset);
2496
    cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
2497
    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
2498
                    data, sizeof(data)) != sizeof(data))
2499
        goto fail;
2500
    qemu_free(s->refcount_table);
2501
    old_table_offset = s->refcount_table_offset;
2502
    old_table_size = s->refcount_table_size;
2503
    s->refcount_table = new_table;
2504
    s->refcount_table_size = new_table_size;
2505
    s->refcount_table_offset = table_offset;
2506

  
2507
    update_refcount(bs, table_offset, new_table_size2, 1);
2508
    free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
2509
    return 0;
2510
 fail:
2511
    free_clusters(bs, table_offset, new_table_size2);
2512
    qemu_free(new_table);
2513
    return -EIO;
2514
}
2515

  
2516

  
2517
static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
2518
{
2519
    BDRVQcowState *s = bs->opaque;
2520
    int64_t offset, refcount_block_offset;
2521
    int ret, refcount_table_index;
2522
    uint64_t data64;
2523

  
2524
    /* Find L1 index and grow refcount table if needed */
2525
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
2526
    if (refcount_table_index >= s->refcount_table_size) {
2527
        ret = grow_refcount_table(bs, refcount_table_index + 1);
2528
        if (ret < 0)
2529
            return ret;
2530
    }
2531

  
2532
    /* Load or allocate the refcount block */
2533
    refcount_block_offset = s->refcount_table[refcount_table_index];
2534
    if (!refcount_block_offset) {
2535
        /* create a new refcount block */
2536
        /* Note: we cannot update the refcount now to avoid recursion */
2537
        offset = alloc_clusters_noref(bs, s->cluster_size);
2538
        memset(s->refcount_block_cache, 0, s->cluster_size);
2539
        ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
2540
        if (ret != s->cluster_size)
2541
            return -EINVAL;
2542
        s->refcount_table[refcount_table_index] = offset;
2543
        data64 = cpu_to_be64(offset);
2544
        ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
2545
                          refcount_table_index * sizeof(uint64_t),
2546
                          &data64, sizeof(data64));
2547
        if (ret != sizeof(data64))
2548
            return -EINVAL;
2549

  
2550
        refcount_block_offset = offset;
2551
        s->refcount_block_cache_offset = offset;
2552
        update_refcount(bs, offset, s->cluster_size, 1);
2553
    } else {
2554
        if (refcount_block_offset != s->refcount_block_cache_offset) {
2555
            if (load_refcount_block(bs, refcount_block_offset) < 0)
2556
                return -EIO;
2557
        }
2558
    }
2559

  
2560
    return refcount_block_offset;
2561
}
2562

  
2563
/* addend must be 1 or -1 */
2564
static int update_cluster_refcount(BlockDriverState *bs,
2565
                                   int64_t cluster_index,
2566
                                   int addend)
2567
{
2568
    BDRVQcowState *s = bs->opaque;
2569
    int ret;
2570

  
2571
    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
2572
    if (ret < 0) {
2573
        return ret;
2574
    }
2575

  
2576
    return get_refcount(bs, cluster_index);
2577
}
2578

  
2579
/* XXX: cache several refcount block clusters ? */
2580
static int update_refcount(BlockDriverState *bs,
2581
                            int64_t offset, int64_t length,
2582
                            int addend)
2583
{
2584
    BDRVQcowState *s = bs->opaque;
2585
    int64_t start, last, cluster_offset;
2586
    int64_t refcount_block_offset = 0;
2587
    int64_t table_index = -1, old_table_index;
2588
    int first_index = -1, last_index = -1;
2589

  
2590
#ifdef DEBUG_ALLOC2
2591
    printf("update_refcount: offset=%lld size=%lld addend=%d\n",
2592
           offset, length, addend);
2593
#endif
2594
    if (length <= 0)
2595
        return -EINVAL;
2596
    start = offset & ~(s->cluster_size - 1);
2597
    last = (offset + length - 1) & ~(s->cluster_size - 1);
2598
    for(cluster_offset = start; cluster_offset <= last;
2599
        cluster_offset += s->cluster_size)
2600
    {
2601
        int block_index, refcount;
2602
        int64_t cluster_index = cluster_offset >> s->cluster_bits;
2603

  
2604
        /* Only write refcount block to disk when we are done with it */
2605
        old_table_index = table_index;
2606
        table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
2607
        if ((old_table_index >= 0) && (table_index != old_table_index)) {
2608
            size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
2609
            if (bdrv_pwrite(s->hd,
2610
                refcount_block_offset + (first_index << REFCOUNT_SHIFT),
2611
                &s->refcount_block_cache[first_index], size) != size)
2612
            {
2613
                return -EIO;
2614
            }
2615

  
2616
            first_index = -1;
2617
            last_index = -1;
2618
        }
2619

  
2620
        /* Load the refcount block and allocate it if needed */
2621
        refcount_block_offset = alloc_refcount_block(bs, cluster_index);
2622
        if (refcount_block_offset < 0) {
2623
            return refcount_block_offset;
2624
        }
2625

  
2626
        /* we can update the count and save it */
2627
        block_index = cluster_index &
2628
            ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
2629
        if (first_index == -1 || block_index < first_index) {
2630
            first_index = block_index;
2631
        }
2632
        if (block_index > last_index) {
2633
            last_index = block_index;
2634
        }
2635

  
2636
        refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
2637
        refcount += addend;
2638
        if (refcount < 0 || refcount > 0xffff)
2639
            return -EINVAL;
2640
        if (refcount == 0 && cluster_index < s->free_cluster_index) {
2641
            s->free_cluster_index = cluster_index;
2642
        }
2643
        s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
2644
    }
2645

  
2646
    /* Write last changed block to disk */
2647
    if (refcount_block_offset != 0) {
2648
        size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
2649
        if (bdrv_pwrite(s->hd,
2650
            refcount_block_offset + (first_index << REFCOUNT_SHIFT),
2651
            &s->refcount_block_cache[first_index], size) != size)
2652
        {
2653
            return -EIO;
2654
        }
2655
    }
2656

  
2657
    return 0;
2658
}
2659

  
2660
/*
2661
 * Increases the refcount for a range of clusters in a given refcount table.
2662
 * This is used to construct a temporary refcount table out of L1 and L2 tables
2663
 * which can be compared the the refcount table saved in the image.
2664
 *
2665
 * Returns the number of errors in the image that were found
2666
 */
2667
static int inc_refcounts(BlockDriverState *bs,
2668
                          uint16_t *refcount_table,
2669
                          int refcount_table_size,
2670
                          int64_t offset, int64_t size)
2671
{
2672
    BDRVQcowState *s = bs->opaque;
2673
    int64_t start, last, cluster_offset;
2674
    int k;
2675
    int errors = 0;
2676

  
2677
    if (size <= 0)
2678
        return 0;
2679

  
2680
    start = offset & ~(s->cluster_size - 1);
2681
    last = (offset + size - 1) & ~(s->cluster_size - 1);
2682
    for(cluster_offset = start; cluster_offset <= last;
2683
        cluster_offset += s->cluster_size) {
2684
        k = cluster_offset >> s->cluster_bits;
2685
        if (k < 0 || k >= refcount_table_size) {
2686
            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
2687
                cluster_offset);
2688
            errors++;
2689
        } else {
2690
            if (++refcount_table[k] == 0) {
2691
                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
2692
                    "\n", cluster_offset);
2693
                errors++;
2694
            }
2695
        }
2696
    }
2697

  
2698
    return errors;
2699
}
2700

  
2701
/*
2702
 * Increases the refcount in the given refcount table for the all clusters
2703
 * referenced in the L2 table. While doing so, performs some checks on L2
2704
 * entries.
2705
 *
2706
 * Returns the number of errors found by the checks or -errno if an internal
2707
 * error occurred.
2708
 */
2709
static int check_refcounts_l2(BlockDriverState *bs,
2710
    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
2711
    int check_copied)
2712
{
2713
    BDRVQcowState *s = bs->opaque;
2714
    uint64_t *l2_table, offset;
2715
    int i, l2_size, nb_csectors, refcount;
2716
    int errors = 0;
2717

  
2718
    /* Read L2 table from disk */
2719
    l2_size = s->l2_size * sizeof(uint64_t);
2720
    l2_table = qemu_malloc(l2_size);
2721

  
2722
    if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
2723
        goto fail;
2724

  
2725
    /* Do the actual checks */
2726
    for(i = 0; i < s->l2_size; i++) {
2727
        offset = be64_to_cpu(l2_table[i]);
2728
        if (offset != 0) {
2729
            if (offset & QCOW_OFLAG_COMPRESSED) {
2730
                /* Compressed clusters don't have QCOW_OFLAG_COPIED */
2731
                if (offset & QCOW_OFLAG_COPIED) {
2732
                    fprintf(stderr, "ERROR: cluster %" PRId64 ": "
2733
                        "copied flag must never be set for compressed "
2734
                        "clusters\n", offset >> s->cluster_bits);
2735
                    offset &= ~QCOW_OFLAG_COPIED;
2736
                    errors++;
2737
                }
2738

  
2739
                /* Mark cluster as used */
2740
                nb_csectors = ((offset >> s->csize_shift) &
2741
                               s->csize_mask) + 1;
2742
                offset &= s->cluster_offset_mask;
2743
                errors += inc_refcounts(bs, refcount_table,
2744
                              refcount_table_size,
2745
                              offset & ~511, nb_csectors * 512);
2746
            } else {
2747
                /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
2748
                if (check_copied) {
2749
                    uint64_t entry = offset;
2750
                    offset &= ~QCOW_OFLAG_COPIED;
2751
                    refcount = get_refcount(bs, offset >> s->cluster_bits);
2752
                    if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
2753
                        fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
2754
                            PRIx64 " refcount=%d\n", entry, refcount);
2755
                        errors++;
2756
                    }
2757
                }
2758

  
2759
                /* Mark cluster as used */
2760
                offset &= ~QCOW_OFLAG_COPIED;
2761
                errors += inc_refcounts(bs, refcount_table,
2762
                              refcount_table_size,
2763
                              offset, s->cluster_size);
2764

  
2765
                /* Correct offsets are cluster aligned */
2766
                if (offset & (s->cluster_size - 1)) {
2767
                    fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
2768
                        "properly aligned; L2 entry corrupted.\n", offset);
2769
                    errors++;
2770
                }
2771
            }
2772
        }
2773
    }
2774

  
2775
    qemu_free(l2_table);
2776
    return errors;
2777

  
2778
fail:
2779
    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
2780
    qemu_free(l2_table);
2781
    return -EIO;
2782
}
2783

  
2784
/*
2785
 * Increases the refcount for the L1 table, its L2 tables and all referenced
2786
 * clusters in the given refcount table. While doing so, performs some checks
2787
 * on L1 and L2 entries.
2788
 *
2789
 * Returns the number of errors found by the checks or -errno if an internal
2790
 * error occurred.
2791
 */
2792
static int check_refcounts_l1(BlockDriverState *bs,
2793
                              uint16_t *refcount_table,
2794
                              int refcount_table_size,
2795
                              int64_t l1_table_offset, int l1_size,
2796
                              int check_copied)
2797
{
2798
    BDRVQcowState *s = bs->opaque;
2799
    uint64_t *l1_table, l2_offset, l1_size2;
2800
    int i, refcount, ret;
2801
    int errors = 0;
2802

  
2803
    l1_size2 = l1_size * sizeof(uint64_t);
2804

  
2805
    /* Mark L1 table as used */
2806
    errors += inc_refcounts(bs, refcount_table, refcount_table_size,
2807
                  l1_table_offset, l1_size2);
2808

  
2809
    /* Read L1 table entries from disk */
2810
    l1_table = qemu_malloc(l1_size2);
2811
    if (bdrv_pread(s->hd, l1_table_offset,
2812
                   l1_table, l1_size2) != l1_size2)
2813
        goto fail;
2814
    for(i = 0;i < l1_size; i++)
2815
        be64_to_cpus(&l1_table[i]);
2816

  
2817
    /* Do the actual checks */
2818
    for(i = 0; i < l1_size; i++) {
2819
        l2_offset = l1_table[i];
2820
        if (l2_offset) {
2821
            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
2822
            if (check_copied) {
2823
                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
2824
                    >> s->cluster_bits);
2825
                if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
2826
                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
2827
                        " refcount=%d\n", l2_offset, refcount);
2828
                    errors++;
2829
                }
2830
            }
2831

  
2832
            /* Mark L2 table as used */
2833
            l2_offset &= ~QCOW_OFLAG_COPIED;
2834
            errors += inc_refcounts(bs, refcount_table,
2835
                          refcount_table_size,
2836
                          l2_offset,
2837
                          s->cluster_size);
2838

  
2839
            /* L2 tables are cluster aligned */
2840
            if (l2_offset & (s->cluster_size - 1)) {
2841
                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
2842
                    "cluster aligned; L1 entry corrupted\n", l2_offset);
2843
                errors++;
2844
            }
2845

  
2846
            /* Process and check L2 entries */
2847
            ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
2848
                l2_offset, check_copied);
2849
            if (ret < 0) {
2850
                goto fail;
2851
            }
2852
            errors += ret;
2853
        }
2854
    }
2855
    qemu_free(l1_table);
2856
    return errors;
2857

  
2858
fail:
2859
    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
2860
    qemu_free(l1_table);
2861
    return -EIO;
2862
}
2863

  
2864
/*
2865
 * Checks an image for refcount consistency.
2866
 *
2867
 * Returns 0 if no errors are found, the number of errors in case the image is
2868
 * detected as corrupted, and -errno when an internal error occured.
2869
 */
2870
static int check_refcounts(BlockDriverState *bs)
2871
{
2872
    BDRVQcowState *s = bs->opaque;
2873
    int64_t size;
2874
    int nb_clusters, refcount1, refcount2, i;
2875
    QCowSnapshot *sn;
2876
    uint16_t *refcount_table;
2877
    int ret, errors = 0;
2878

  
2879
    size = bdrv_getlength(s->hd);
2880
    nb_clusters = size_to_clusters(s, size);
2881
    refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
2882

  
2883
    /* header */
2884
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
2885
                  0, s->cluster_size);
2886

  
2887
    /* current L1 table */
2888
    ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
2889
                       s->l1_table_offset, s->l1_size, 1);
2890
    if (ret < 0) {
2891
        return ret;
2892
    }
2893
    errors += ret;
2894

  
2895
    /* snapshots */
2896
    for(i = 0; i < s->nb_snapshots; i++) {
2897
        sn = s->snapshots + i;
2898
        check_refcounts_l1(bs, refcount_table, nb_clusters,
2899
                           sn->l1_table_offset, sn->l1_size, 0);
2900
    }
2901
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
2902
                  s->snapshots_offset, s->snapshots_size);
2903

  
2904
    /* refcount data */
2905
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
2906
                  s->refcount_table_offset,
2907
                  s->refcount_table_size * sizeof(uint64_t));
2908
    for(i = 0; i < s->refcount_table_size; i++) {
2909
        int64_t offset;
2910
        offset = s->refcount_table[i];
2911
        if (offset != 0) {
2912
            errors += inc_refcounts(bs, refcount_table, nb_clusters,
2913
                          offset, s->cluster_size);
2914
        }
2915
    }
2916

  
2917
    /* compare ref counts */
2918
    for(i = 0; i < nb_clusters; i++) {
2919
        refcount1 = get_refcount(bs, i);
2920
        refcount2 = refcount_table[i];
2921
        if (refcount1 != refcount2) {
2922
            fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
2923
                   i, refcount1, refcount2);
2924
            errors++;
2925
        }
2926
    }
2927

  
2928
    qemu_free(refcount_table);
2929

  
2930
    return errors;
2931
}
2932

  
2933 2066
static int qcow_check(BlockDriverState *bs)
2934 2067
{
2935 2068
    return check_refcounts(bs);

Also available in: Unified diff