Statistics
| Branch: | Revision:

root / block / qcow2-refcount.c @ 45aba42f

History | View | Annotate | Download (27.6 kB)

1
/*
2
 * Block driver for the QCOW version 2 format
3
 *
4
 * Copyright (c) 2004-2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25
#include "qemu-common.h"
26
#include "block_int.h"
27
#include "block/qcow2.h"
28

    
29
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
30
static int update_refcount(BlockDriverState *bs,
31
                            int64_t offset, int64_t length,
32
                            int addend);
33

    
34
/*********************************************************/
35
/* refcount handling */
36

    
37
int refcount_init(BlockDriverState *bs)
38
{
39
    BDRVQcowState *s = bs->opaque;
40
    int ret, refcount_table_size2, i;
41

    
42
    s->refcount_block_cache = qemu_malloc(s->cluster_size);
43
    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
44
    s->refcount_table = qemu_malloc(refcount_table_size2);
45
    if (s->refcount_table_size > 0) {
46
        ret = bdrv_pread(s->hd, s->refcount_table_offset,
47
                         s->refcount_table, refcount_table_size2);
48
        if (ret != refcount_table_size2)
49
            goto fail;
50
        for(i = 0; i < s->refcount_table_size; i++)
51
            be64_to_cpus(&s->refcount_table[i]);
52
    }
53
    return 0;
54
 fail:
55
    return -ENOMEM;
56
}
57

    
58
void refcount_close(BlockDriverState *bs)
59
{
60
    BDRVQcowState *s = bs->opaque;
61
    qemu_free(s->refcount_block_cache);
62
    qemu_free(s->refcount_table);
63
}
64

    
65

    
66
static int load_refcount_block(BlockDriverState *bs,
67
                               int64_t refcount_block_offset)
68
{
69
    BDRVQcowState *s = bs->opaque;
70
    int ret;
71
    ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
72
                     s->cluster_size);
73
    if (ret != s->cluster_size)
74
        return -EIO;
75
    s->refcount_block_cache_offset = refcount_block_offset;
76
    return 0;
77
}
78

    
79
static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
80
{
81
    BDRVQcowState *s = bs->opaque;
82
    int refcount_table_index, block_index;
83
    int64_t refcount_block_offset;
84

    
85
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
86
    if (refcount_table_index >= s->refcount_table_size)
87
        return 0;
88
    refcount_block_offset = s->refcount_table[refcount_table_index];
89
    if (!refcount_block_offset)
90
        return 0;
91
    if (refcount_block_offset != s->refcount_block_cache_offset) {
92
        /* better than nothing: return allocated if read error */
93
        if (load_refcount_block(bs, refcount_block_offset) < 0)
94
            return 1;
95
    }
96
    block_index = cluster_index &
97
        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
98
    return be16_to_cpu(s->refcount_block_cache[block_index]);
99
}
100

    
101
static int grow_refcount_table(BlockDriverState *bs, int min_size)
102
{
103
    BDRVQcowState *s = bs->opaque;
104
    int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
105
    uint64_t *new_table;
106
    int64_t table_offset;
107
    uint8_t data[12];
108
    int old_table_size;
109
    int64_t old_table_offset;
110

    
111
    if (min_size <= s->refcount_table_size)
112
        return 0;
113
    /* compute new table size */
114
    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
115
    for(;;) {
116
        if (refcount_table_clusters == 0) {
117
            refcount_table_clusters = 1;
118
        } else {
119
            refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
120
        }
121
        new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
122
        if (min_size <= new_table_size)
123
            break;
124
    }
125
#ifdef DEBUG_ALLOC2
126
    printf("grow_refcount_table from %d to %d\n",
127
           s->refcount_table_size,
128
           new_table_size);
129
#endif
130
    new_table_size2 = new_table_size * sizeof(uint64_t);
131
    new_table = qemu_mallocz(new_table_size2);
132
    memcpy(new_table, s->refcount_table,
133
           s->refcount_table_size * sizeof(uint64_t));
134
    for(i = 0; i < s->refcount_table_size; i++)
135
        cpu_to_be64s(&new_table[i]);
136
    /* Note: we cannot update the refcount now to avoid recursion */
137
    table_offset = alloc_clusters_noref(bs, new_table_size2);
138
    ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
139
    if (ret != new_table_size2)
140
        goto fail;
141
    for(i = 0; i < s->refcount_table_size; i++)
142
        be64_to_cpus(&new_table[i]);
143

    
144
    cpu_to_be64w((uint64_t*)data, table_offset);
145
    cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
146
    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
147
                    data, sizeof(data)) != sizeof(data))
148
        goto fail;
149
    qemu_free(s->refcount_table);
150
    old_table_offset = s->refcount_table_offset;
151
    old_table_size = s->refcount_table_size;
152
    s->refcount_table = new_table;
153
    s->refcount_table_size = new_table_size;
154
    s->refcount_table_offset = table_offset;
155

    
156
    update_refcount(bs, table_offset, new_table_size2, 1);
157
    free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
158
    return 0;
159
 fail:
160
    free_clusters(bs, table_offset, new_table_size2);
161
    qemu_free(new_table);
162
    return -EIO;
163
}
164

    
165

    
166
static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
167
{
168
    BDRVQcowState *s = bs->opaque;
169
    int64_t offset, refcount_block_offset;
170
    int ret, refcount_table_index;
171
    uint64_t data64;
172

    
173
    /* Find L1 index and grow refcount table if needed */
174
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
175
    if (refcount_table_index >= s->refcount_table_size) {
176
        ret = grow_refcount_table(bs, refcount_table_index + 1);
177
        if (ret < 0)
178
            return ret;
179
    }
180

    
181
    /* Load or allocate the refcount block */
182
    refcount_block_offset = s->refcount_table[refcount_table_index];
183
    if (!refcount_block_offset) {
184
        /* create a new refcount block */
185
        /* Note: we cannot update the refcount now to avoid recursion */
186
        offset = alloc_clusters_noref(bs, s->cluster_size);
187
        memset(s->refcount_block_cache, 0, s->cluster_size);
188
        ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
189
        if (ret != s->cluster_size)
190
            return -EINVAL;
191
        s->refcount_table[refcount_table_index] = offset;
192
        data64 = cpu_to_be64(offset);
193
        ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
194
                          refcount_table_index * sizeof(uint64_t),
195
                          &data64, sizeof(data64));
196
        if (ret != sizeof(data64))
197
            return -EINVAL;
198

    
199
        refcount_block_offset = offset;
200
        s->refcount_block_cache_offset = offset;
201
        update_refcount(bs, offset, s->cluster_size, 1);
202
    } else {
203
        if (refcount_block_offset != s->refcount_block_cache_offset) {
204
            if (load_refcount_block(bs, refcount_block_offset) < 0)
205
                return -EIO;
206
        }
207
    }
208

    
209
    return refcount_block_offset;
210
}
211

    
212
/* XXX: cache several refcount block clusters ? */
213
static int update_refcount(BlockDriverState *bs,
214
                            int64_t offset, int64_t length,
215
                            int addend)
216
{
217
    BDRVQcowState *s = bs->opaque;
218
    int64_t start, last, cluster_offset;
219
    int64_t refcount_block_offset = 0;
220
    int64_t table_index = -1, old_table_index;
221
    int first_index = -1, last_index = -1;
222

    
223
#ifdef DEBUG_ALLOC2
224
    printf("update_refcount: offset=%lld size=%lld addend=%d\n",
225
           offset, length, addend);
226
#endif
227
    if (length <= 0)
228
        return -EINVAL;
229
    start = offset & ~(s->cluster_size - 1);
230
    last = (offset + length - 1) & ~(s->cluster_size - 1);
231
    for(cluster_offset = start; cluster_offset <= last;
232
        cluster_offset += s->cluster_size)
233
    {
234
        int block_index, refcount;
235
        int64_t cluster_index = cluster_offset >> s->cluster_bits;
236

    
237
        /* Only write refcount block to disk when we are done with it */
238
        old_table_index = table_index;
239
        table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
240
        if ((old_table_index >= 0) && (table_index != old_table_index)) {
241
            size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
242
            if (bdrv_pwrite(s->hd,
243
                refcount_block_offset + (first_index << REFCOUNT_SHIFT),
244
                &s->refcount_block_cache[first_index], size) != size)
245
            {
246
                return -EIO;
247
            }
248

    
249
            first_index = -1;
250
            last_index = -1;
251
        }
252

    
253
        /* Load the refcount block and allocate it if needed */
254
        refcount_block_offset = alloc_refcount_block(bs, cluster_index);
255
        if (refcount_block_offset < 0) {
256
            return refcount_block_offset;
257
        }
258

    
259
        /* we can update the count and save it */
260
        block_index = cluster_index &
261
            ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
262
        if (first_index == -1 || block_index < first_index) {
263
            first_index = block_index;
264
        }
265
        if (block_index > last_index) {
266
            last_index = block_index;
267
        }
268

    
269
        refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
270
        refcount += addend;
271
        if (refcount < 0 || refcount > 0xffff)
272
            return -EINVAL;
273
        if (refcount == 0 && cluster_index < s->free_cluster_index) {
274
            s->free_cluster_index = cluster_index;
275
        }
276
        s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
277
    }
278

    
279
    /* Write last changed block to disk */
280
    if (refcount_block_offset != 0) {
281
        size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
282
        if (bdrv_pwrite(s->hd,
283
            refcount_block_offset + (first_index << REFCOUNT_SHIFT),
284
            &s->refcount_block_cache[first_index], size) != size)
285
        {
286
            return -EIO;
287
        }
288
    }
289

    
290
    return 0;
291
}
292

    
293
/* addend must be 1 or -1 */
294
static int update_cluster_refcount(BlockDriverState *bs,
295
                                   int64_t cluster_index,
296
                                   int addend)
297
{
298
    BDRVQcowState *s = bs->opaque;
299
    int ret;
300

    
301
    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
302
    if (ret < 0) {
303
        return ret;
304
    }
305

    
306
    return get_refcount(bs, cluster_index);
307
}
308

    
309

    
310

    
311
/*********************************************************/
312
/* cluster allocation functions */
313

    
314

    
315

    
316
/* return < 0 if error */
317
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
318
{
319
    BDRVQcowState *s = bs->opaque;
320
    int i, nb_clusters;
321

    
322
    nb_clusters = size_to_clusters(s, size);
323
retry:
324
    for(i = 0; i < nb_clusters; i++) {
325
        int64_t i = s->free_cluster_index++;
326
        if (get_refcount(bs, i) != 0)
327
            goto retry;
328
    }
329
#ifdef DEBUG_ALLOC2
330
    printf("alloc_clusters: size=%lld -> %lld\n",
331
            size,
332
            (s->free_cluster_index - nb_clusters) << s->cluster_bits);
333
#endif
334
    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
335
}
336

    
337
int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
338
{
339
    int64_t offset;
340

    
341
    offset = alloc_clusters_noref(bs, size);
342
    update_refcount(bs, offset, size, 1);
343
    return offset;
344
}
345

    
346
/* only used to allocate compressed sectors. We try to allocate
347
   contiguous sectors. size must be <= cluster_size */
348
int64_t alloc_bytes(BlockDriverState *bs, int size)
349
{
350
    BDRVQcowState *s = bs->opaque;
351
    int64_t offset, cluster_offset;
352
    int free_in_cluster;
353

    
354
    assert(size > 0 && size <= s->cluster_size);
355
    if (s->free_byte_offset == 0) {
356
        s->free_byte_offset = alloc_clusters(bs, s->cluster_size);
357
    }
358
 redo:
359
    free_in_cluster = s->cluster_size -
360
        (s->free_byte_offset & (s->cluster_size - 1));
361
    if (size <= free_in_cluster) {
362
        /* enough space in current cluster */
363
        offset = s->free_byte_offset;
364
        s->free_byte_offset += size;
365
        free_in_cluster -= size;
366
        if (free_in_cluster == 0)
367
            s->free_byte_offset = 0;
368
        if ((offset & (s->cluster_size - 1)) != 0)
369
            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
370
    } else {
371
        offset = alloc_clusters(bs, s->cluster_size);
372
        cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
373
        if ((cluster_offset + s->cluster_size) == offset) {
374
            /* we are lucky: contiguous data */
375
            offset = s->free_byte_offset;
376
            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
377
            s->free_byte_offset += size;
378
        } else {
379
            s->free_byte_offset = offset;
380
            goto redo;
381
        }
382
    }
383
    return offset;
384
}
385

    
386
void free_clusters(BlockDriverState *bs,
387
                          int64_t offset, int64_t size)
388
{
389
    update_refcount(bs, offset, size, -1);
390
}
391

    
392
/*
393
 * free_any_clusters
394
 *
395
 * free clusters according to its type: compressed or not
396
 *
397
 */
398

    
399
void free_any_clusters(BlockDriverState *bs,
400
    uint64_t cluster_offset, int nb_clusters)
401
{
402
    BDRVQcowState *s = bs->opaque;
403

    
404
    /* free the cluster */
405

    
406
    if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
407
        int nb_csectors;
408
        nb_csectors = ((cluster_offset >> s->csize_shift) &
409
                       s->csize_mask) + 1;
410
        free_clusters(bs, (cluster_offset & s->cluster_offset_mask) & ~511,
411
                      nb_csectors * 512);
412
        return;
413
    }
414

    
415
    free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
416

    
417
    return;
418
}
419

    
420

    
421

    
422
/*********************************************************/
423
/* snapshots and image creation */
424

    
425

    
426

    
427
void create_refcount_update(QCowCreateState *s, int64_t offset, int64_t size)
428
{
429
    int refcount;
430
    int64_t start, last, cluster_offset;
431
    uint16_t *p;
432

    
433
    start = offset & ~(s->cluster_size - 1);
434
    last = (offset + size - 1)  & ~(s->cluster_size - 1);
435
    for(cluster_offset = start; cluster_offset <= last;
436
        cluster_offset += s->cluster_size) {
437
        p = &s->refcount_block[cluster_offset >> s->cluster_bits];
438
        refcount = be16_to_cpu(*p);
439
        refcount++;
440
        *p = cpu_to_be16(refcount);
441
    }
442
}
443

    
444
/* update the refcounts of snapshots and the copied flag */
445
int update_snapshot_refcount(BlockDriverState *bs,
446
                             int64_t l1_table_offset,
447
                             int l1_size,
448
                             int addend)
449
{
450
    BDRVQcowState *s = bs->opaque;
451
    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
452
    int64_t old_offset, old_l2_offset;
453
    int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
454

    
455
    l2_cache_reset(bs);
456

    
457
    l2_table = NULL;
458
    l1_table = NULL;
459
    l1_size2 = l1_size * sizeof(uint64_t);
460
    l1_allocated = 0;
461
    if (l1_table_offset != s->l1_table_offset) {
462
        l1_table = qemu_malloc(l1_size2);
463
        l1_allocated = 1;
464
        if (bdrv_pread(s->hd, l1_table_offset,
465
                       l1_table, l1_size2) != l1_size2)
466
            goto fail;
467
        for(i = 0;i < l1_size; i++)
468
            be64_to_cpus(&l1_table[i]);
469
    } else {
470
        assert(l1_size == s->l1_size);
471
        l1_table = s->l1_table;
472
        l1_allocated = 0;
473
    }
474

    
475
    l2_size = s->l2_size * sizeof(uint64_t);
476
    l2_table = qemu_malloc(l2_size);
477
    l1_modified = 0;
478
    for(i = 0; i < l1_size; i++) {
479
        l2_offset = l1_table[i];
480
        if (l2_offset) {
481
            old_l2_offset = l2_offset;
482
            l2_offset &= ~QCOW_OFLAG_COPIED;
483
            l2_modified = 0;
484
            if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
485
                goto fail;
486
            for(j = 0; j < s->l2_size; j++) {
487
                offset = be64_to_cpu(l2_table[j]);
488
                if (offset != 0) {
489
                    old_offset = offset;
490
                    offset &= ~QCOW_OFLAG_COPIED;
491
                    if (offset & QCOW_OFLAG_COMPRESSED) {
492
                        nb_csectors = ((offset >> s->csize_shift) &
493
                                       s->csize_mask) + 1;
494
                        if (addend != 0)
495
                            update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
496
                                            nb_csectors * 512, addend);
497
                        /* compressed clusters are never modified */
498
                        refcount = 2;
499
                    } else {
500
                        if (addend != 0) {
501
                            refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
502
                        } else {
503
                            refcount = get_refcount(bs, offset >> s->cluster_bits);
504
                        }
505
                    }
506

    
507
                    if (refcount == 1) {
508
                        offset |= QCOW_OFLAG_COPIED;
509
                    }
510
                    if (offset != old_offset) {
511
                        l2_table[j] = cpu_to_be64(offset);
512
                        l2_modified = 1;
513
                    }
514
                }
515
            }
516
            if (l2_modified) {
517
                if (bdrv_pwrite(s->hd,
518
                                l2_offset, l2_table, l2_size) != l2_size)
519
                    goto fail;
520
            }
521

    
522
            if (addend != 0) {
523
                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
524
            } else {
525
                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
526
            }
527
            if (refcount == 1) {
528
                l2_offset |= QCOW_OFLAG_COPIED;
529
            }
530
            if (l2_offset != old_l2_offset) {
531
                l1_table[i] = l2_offset;
532
                l1_modified = 1;
533
            }
534
        }
535
    }
536
    if (l1_modified) {
537
        for(i = 0; i < l1_size; i++)
538
            cpu_to_be64s(&l1_table[i]);
539
        if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
540
                        l1_size2) != l1_size2)
541
            goto fail;
542
        for(i = 0; i < l1_size; i++)
543
            be64_to_cpus(&l1_table[i]);
544
    }
545
    if (l1_allocated)
546
        qemu_free(l1_table);
547
    qemu_free(l2_table);
548
    return 0;
549
 fail:
550
    if (l1_allocated)
551
        qemu_free(l1_table);
552
    qemu_free(l2_table);
553
    return -EIO;
554
}
555

    
556

    
557

    
558

    
559
/*********************************************************/
560
/* refcount checking functions */
561

    
562

    
563

    
564
/*
565
 * Increases the refcount for a range of clusters in a given refcount table.
566
 * This is used to construct a temporary refcount table out of L1 and L2 tables
567
 * which can be compared the the refcount table saved in the image.
568
 *
569
 * Returns the number of errors in the image that were found
570
 */
571
static int inc_refcounts(BlockDriverState *bs,
572
                          uint16_t *refcount_table,
573
                          int refcount_table_size,
574
                          int64_t offset, int64_t size)
575
{
576
    BDRVQcowState *s = bs->opaque;
577
    int64_t start, last, cluster_offset;
578
    int k;
579
    int errors = 0;
580

    
581
    if (size <= 0)
582
        return 0;
583

    
584
    start = offset & ~(s->cluster_size - 1);
585
    last = (offset + size - 1) & ~(s->cluster_size - 1);
586
    for(cluster_offset = start; cluster_offset <= last;
587
        cluster_offset += s->cluster_size) {
588
        k = cluster_offset >> s->cluster_bits;
589
        if (k < 0 || k >= refcount_table_size) {
590
            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
591
                cluster_offset);
592
            errors++;
593
        } else {
594
            if (++refcount_table[k] == 0) {
595
                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
596
                    "\n", cluster_offset);
597
                errors++;
598
            }
599
        }
600
    }
601

    
602
    return errors;
603
}
604

    
605
/*
606
 * Increases the refcount in the given refcount table for the all clusters
607
 * referenced in the L2 table. While doing so, performs some checks on L2
608
 * entries.
609
 *
610
 * Returns the number of errors found by the checks or -errno if an internal
611
 * error occurred.
612
 */
613
static int check_refcounts_l2(BlockDriverState *bs,
614
    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
615
    int check_copied)
616
{
617
    BDRVQcowState *s = bs->opaque;
618
    uint64_t *l2_table, offset;
619
    int i, l2_size, nb_csectors, refcount;
620
    int errors = 0;
621

    
622
    /* Read L2 table from disk */
623
    l2_size = s->l2_size * sizeof(uint64_t);
624
    l2_table = qemu_malloc(l2_size);
625

    
626
    if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
627
        goto fail;
628

    
629
    /* Do the actual checks */
630
    for(i = 0; i < s->l2_size; i++) {
631
        offset = be64_to_cpu(l2_table[i]);
632
        if (offset != 0) {
633
            if (offset & QCOW_OFLAG_COMPRESSED) {
634
                /* Compressed clusters don't have QCOW_OFLAG_COPIED */
635
                if (offset & QCOW_OFLAG_COPIED) {
636
                    fprintf(stderr, "ERROR: cluster %" PRId64 ": "
637
                        "copied flag must never be set for compressed "
638
                        "clusters\n", offset >> s->cluster_bits);
639
                    offset &= ~QCOW_OFLAG_COPIED;
640
                    errors++;
641
                }
642

    
643
                /* Mark cluster as used */
644
                nb_csectors = ((offset >> s->csize_shift) &
645
                               s->csize_mask) + 1;
646
                offset &= s->cluster_offset_mask;
647
                errors += inc_refcounts(bs, refcount_table,
648
                              refcount_table_size,
649
                              offset & ~511, nb_csectors * 512);
650
            } else {
651
                /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
652
                if (check_copied) {
653
                    uint64_t entry = offset;
654
                    offset &= ~QCOW_OFLAG_COPIED;
655
                    refcount = get_refcount(bs, offset >> s->cluster_bits);
656
                    if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
657
                        fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
658
                            PRIx64 " refcount=%d\n", entry, refcount);
659
                        errors++;
660
                    }
661
                }
662

    
663
                /* Mark cluster as used */
664
                offset &= ~QCOW_OFLAG_COPIED;
665
                errors += inc_refcounts(bs, refcount_table,
666
                              refcount_table_size,
667
                              offset, s->cluster_size);
668

    
669
                /* Correct offsets are cluster aligned */
670
                if (offset & (s->cluster_size - 1)) {
671
                    fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
672
                        "properly aligned; L2 entry corrupted.\n", offset);
673
                    errors++;
674
                }
675
            }
676
        }
677
    }
678

    
679
    qemu_free(l2_table);
680
    return errors;
681

    
682
fail:
683
    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
684
    qemu_free(l2_table);
685
    return -EIO;
686
}
687

    
688
/*
689
 * Increases the refcount for the L1 table, its L2 tables and all referenced
690
 * clusters in the given refcount table. While doing so, performs some checks
691
 * on L1 and L2 entries.
692
 *
693
 * Returns the number of errors found by the checks or -errno if an internal
694
 * error occurred.
695
 */
696
static int check_refcounts_l1(BlockDriverState *bs,
697
                              uint16_t *refcount_table,
698
                              int refcount_table_size,
699
                              int64_t l1_table_offset, int l1_size,
700
                              int check_copied)
701
{
702
    BDRVQcowState *s = bs->opaque;
703
    uint64_t *l1_table, l2_offset, l1_size2;
704
    int i, refcount, ret;
705
    int errors = 0;
706

    
707
    l1_size2 = l1_size * sizeof(uint64_t);
708

    
709
    /* Mark L1 table as used */
710
    errors += inc_refcounts(bs, refcount_table, refcount_table_size,
711
                  l1_table_offset, l1_size2);
712

    
713
    /* Read L1 table entries from disk */
714
    l1_table = qemu_malloc(l1_size2);
715
    if (bdrv_pread(s->hd, l1_table_offset,
716
                   l1_table, l1_size2) != l1_size2)
717
        goto fail;
718
    for(i = 0;i < l1_size; i++)
719
        be64_to_cpus(&l1_table[i]);
720

    
721
    /* Do the actual checks */
722
    for(i = 0; i < l1_size; i++) {
723
        l2_offset = l1_table[i];
724
        if (l2_offset) {
725
            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
726
            if (check_copied) {
727
                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
728
                    >> s->cluster_bits);
729
                if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
730
                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
731
                        " refcount=%d\n", l2_offset, refcount);
732
                    errors++;
733
                }
734
            }
735

    
736
            /* Mark L2 table as used */
737
            l2_offset &= ~QCOW_OFLAG_COPIED;
738
            errors += inc_refcounts(bs, refcount_table,
739
                          refcount_table_size,
740
                          l2_offset,
741
                          s->cluster_size);
742

    
743
            /* L2 tables are cluster aligned */
744
            if (l2_offset & (s->cluster_size - 1)) {
745
                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
746
                    "cluster aligned; L1 entry corrupted\n", l2_offset);
747
                errors++;
748
            }
749

    
750
            /* Process and check L2 entries */
751
            ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
752
                l2_offset, check_copied);
753
            if (ret < 0) {
754
                goto fail;
755
            }
756
            errors += ret;
757
        }
758
    }
759
    qemu_free(l1_table);
760
    return errors;
761

    
762
fail:
763
    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
764
    qemu_free(l1_table);
765
    return -EIO;
766
}
767

    
768
/*
769
 * Checks an image for refcount consistency.
770
 *
771
 * Returns 0 if no errors are found, the number of errors in case the image is
772
 * detected as corrupted, and -errno when an internal error occured.
773
 */
774
int check_refcounts(BlockDriverState *bs)
775
{
776
    BDRVQcowState *s = bs->opaque;
777
    int64_t size;
778
    int nb_clusters, refcount1, refcount2, i;
779
    QCowSnapshot *sn;
780
    uint16_t *refcount_table;
781
    int ret, errors = 0;
782

    
783
    size = bdrv_getlength(s->hd);
784
    nb_clusters = size_to_clusters(s, size);
785
    refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
786

    
787
    /* header */
788
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
789
                  0, s->cluster_size);
790

    
791
    /* current L1 table */
792
    ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
793
                       s->l1_table_offset, s->l1_size, 1);
794
    if (ret < 0) {
795
        return ret;
796
    }
797
    errors += ret;
798

    
799
    /* snapshots */
800
    for(i = 0; i < s->nb_snapshots; i++) {
801
        sn = s->snapshots + i;
802
        check_refcounts_l1(bs, refcount_table, nb_clusters,
803
                           sn->l1_table_offset, sn->l1_size, 0);
804
    }
805
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
806
                  s->snapshots_offset, s->snapshots_size);
807

    
808
    /* refcount data */
809
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
810
                  s->refcount_table_offset,
811
                  s->refcount_table_size * sizeof(uint64_t));
812
    for(i = 0; i < s->refcount_table_size; i++) {
813
        int64_t offset;
814
        offset = s->refcount_table[i];
815
        if (offset != 0) {
816
            errors += inc_refcounts(bs, refcount_table, nb_clusters,
817
                          offset, s->cluster_size);
818
        }
819
    }
820

    
821
    /* compare ref counts */
822
    for(i = 0; i < nb_clusters; i++) {
823
        refcount1 = get_refcount(bs, i);
824
        refcount2 = refcount_table[i];
825
        if (refcount1 != refcount2) {
826
            fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
827
                   i, refcount1, refcount2);
828
            errors++;
829
        }
830
    }
831

    
832
    qemu_free(refcount_table);
833

    
834
    return errors;
835
}
836