Revision f7d0fe02

b/Makefile
68 68
BLOCK_OBJS=cutils.o cache-utils.o qemu-malloc.o qemu-option.o module.o
69 69
BLOCK_OBJS+=block/cow.o block/qcow.o aes.o block/vmdk.o block/cloop.o
70 70
BLOCK_OBJS+=block/dmg.o block/bochs.o block/vpc.o block/vvfat.o
71
BLOCK_OBJS+=block/qcow2.o block/parallels.o block/nbd.o
71
BLOCK_OBJS+=block/qcow2.o block/qcow2-refcount.o
72
BLOCK_OBJS+=block/parallels.o block/nbd.o
72 73
BLOCK_OBJS+=nbd.o block.o aio.o
73 74

  
74 75
ifdef CONFIG_WIN32
b/block/qcow2-refcount.c
1
/*
2
 * Block driver for the QCOW version 2 format
3
 *
4
 * Copyright (c) 2004-2006 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

  
25
#include "qemu-common.h"
26
#include "block_int.h"
27
#include "block/qcow2.h"
28

  
29
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
30
static int update_refcount(BlockDriverState *bs,
31
                            int64_t offset, int64_t length,
32
                            int addend);
33

  
34
/*********************************************************/
35
/* refcount handling */
36

  
37
int refcount_init(BlockDriverState *bs)
38
{
39
    BDRVQcowState *s = bs->opaque;
40
    int ret, refcount_table_size2, i;
41

  
42
    s->refcount_block_cache = qemu_malloc(s->cluster_size);
43
    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
44
    s->refcount_table = qemu_malloc(refcount_table_size2);
45
    if (s->refcount_table_size > 0) {
46
        ret = bdrv_pread(s->hd, s->refcount_table_offset,
47
                         s->refcount_table, refcount_table_size2);
48
        if (ret != refcount_table_size2)
49
            goto fail;
50
        for(i = 0; i < s->refcount_table_size; i++)
51
            be64_to_cpus(&s->refcount_table[i]);
52
    }
53
    return 0;
54
 fail:
55
    return -ENOMEM;
56
}
57

  
58
void refcount_close(BlockDriverState *bs)
59
{
60
    BDRVQcowState *s = bs->opaque;
61
    qemu_free(s->refcount_block_cache);
62
    qemu_free(s->refcount_table);
63
}
64

  
65

  
66
static int load_refcount_block(BlockDriverState *bs,
67
                               int64_t refcount_block_offset)
68
{
69
    BDRVQcowState *s = bs->opaque;
70
    int ret;
71
    ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
72
                     s->cluster_size);
73
    if (ret != s->cluster_size)
74
        return -EIO;
75
    s->refcount_block_cache_offset = refcount_block_offset;
76
    return 0;
77
}
78

  
79
static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
80
{
81
    BDRVQcowState *s = bs->opaque;
82
    int refcount_table_index, block_index;
83
    int64_t refcount_block_offset;
84

  
85
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
86
    if (refcount_table_index >= s->refcount_table_size)
87
        return 0;
88
    refcount_block_offset = s->refcount_table[refcount_table_index];
89
    if (!refcount_block_offset)
90
        return 0;
91
    if (refcount_block_offset != s->refcount_block_cache_offset) {
92
        /* better than nothing: return allocated if read error */
93
        if (load_refcount_block(bs, refcount_block_offset) < 0)
94
            return 1;
95
    }
96
    block_index = cluster_index &
97
        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
98
    return be16_to_cpu(s->refcount_block_cache[block_index]);
99
}
100

  
101
static int grow_refcount_table(BlockDriverState *bs, int min_size)
102
{
103
    BDRVQcowState *s = bs->opaque;
104
    int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
105
    uint64_t *new_table;
106
    int64_t table_offset;
107
    uint8_t data[12];
108
    int old_table_size;
109
    int64_t old_table_offset;
110

  
111
    if (min_size <= s->refcount_table_size)
112
        return 0;
113
    /* compute new table size */
114
    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
115
    for(;;) {
116
        if (refcount_table_clusters == 0) {
117
            refcount_table_clusters = 1;
118
        } else {
119
            refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
120
        }
121
        new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
122
        if (min_size <= new_table_size)
123
            break;
124
    }
125
#ifdef DEBUG_ALLOC2
126
    printf("grow_refcount_table from %d to %d\n",
127
           s->refcount_table_size,
128
           new_table_size);
129
#endif
130
    new_table_size2 = new_table_size * sizeof(uint64_t);
131
    new_table = qemu_mallocz(new_table_size2);
132
    memcpy(new_table, s->refcount_table,
133
           s->refcount_table_size * sizeof(uint64_t));
134
    for(i = 0; i < s->refcount_table_size; i++)
135
        cpu_to_be64s(&new_table[i]);
136
    /* Note: we cannot update the refcount now to avoid recursion */
137
    table_offset = alloc_clusters_noref(bs, new_table_size2);
138
    ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
139
    if (ret != new_table_size2)
140
        goto fail;
141
    for(i = 0; i < s->refcount_table_size; i++)
142
        be64_to_cpus(&new_table[i]);
143

  
144
    cpu_to_be64w((uint64_t*)data, table_offset);
145
    cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
146
    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
147
                    data, sizeof(data)) != sizeof(data))
148
        goto fail;
149
    qemu_free(s->refcount_table);
150
    old_table_offset = s->refcount_table_offset;
151
    old_table_size = s->refcount_table_size;
152
    s->refcount_table = new_table;
153
    s->refcount_table_size = new_table_size;
154
    s->refcount_table_offset = table_offset;
155

  
156
    update_refcount(bs, table_offset, new_table_size2, 1);
157
    free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
158
    return 0;
159
 fail:
160
    free_clusters(bs, table_offset, new_table_size2);
161
    qemu_free(new_table);
162
    return -EIO;
163
}
164

  
165

  
166
static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
167
{
168
    BDRVQcowState *s = bs->opaque;
169
    int64_t offset, refcount_block_offset;
170
    int ret, refcount_table_index;
171
    uint64_t data64;
172

  
173
    /* Find L1 index and grow refcount table if needed */
174
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
175
    if (refcount_table_index >= s->refcount_table_size) {
176
        ret = grow_refcount_table(bs, refcount_table_index + 1);
177
        if (ret < 0)
178
            return ret;
179
    }
180

  
181
    /* Load or allocate the refcount block */
182
    refcount_block_offset = s->refcount_table[refcount_table_index];
183
    if (!refcount_block_offset) {
184
        /* create a new refcount block */
185
        /* Note: we cannot update the refcount now to avoid recursion */
186
        offset = alloc_clusters_noref(bs, s->cluster_size);
187
        memset(s->refcount_block_cache, 0, s->cluster_size);
188
        ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
189
        if (ret != s->cluster_size)
190
            return -EINVAL;
191
        s->refcount_table[refcount_table_index] = offset;
192
        data64 = cpu_to_be64(offset);
193
        ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
194
                          refcount_table_index * sizeof(uint64_t),
195
                          &data64, sizeof(data64));
196
        if (ret != sizeof(data64))
197
            return -EINVAL;
198

  
199
        refcount_block_offset = offset;
200
        s->refcount_block_cache_offset = offset;
201
        update_refcount(bs, offset, s->cluster_size, 1);
202
    } else {
203
        if (refcount_block_offset != s->refcount_block_cache_offset) {
204
            if (load_refcount_block(bs, refcount_block_offset) < 0)
205
                return -EIO;
206
        }
207
    }
208

  
209
    return refcount_block_offset;
210
}
211

  
212
/* XXX: cache several refcount block clusters ? */
213
static int update_refcount(BlockDriverState *bs,
214
                            int64_t offset, int64_t length,
215
                            int addend)
216
{
217
    BDRVQcowState *s = bs->opaque;
218
    int64_t start, last, cluster_offset;
219
    int64_t refcount_block_offset = 0;
220
    int64_t table_index = -1, old_table_index;
221
    int first_index = -1, last_index = -1;
222

  
223
#ifdef DEBUG_ALLOC2
224
    printf("update_refcount: offset=%lld size=%lld addend=%d\n",
225
           offset, length, addend);
226
#endif
227
    if (length <= 0)
228
        return -EINVAL;
229
    start = offset & ~(s->cluster_size - 1);
230
    last = (offset + length - 1) & ~(s->cluster_size - 1);
231
    for(cluster_offset = start; cluster_offset <= last;
232
        cluster_offset += s->cluster_size)
233
    {
234
        int block_index, refcount;
235
        int64_t cluster_index = cluster_offset >> s->cluster_bits;
236

  
237
        /* Only write refcount block to disk when we are done with it */
238
        old_table_index = table_index;
239
        table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
240
        if ((old_table_index >= 0) && (table_index != old_table_index)) {
241
            size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
242
            if (bdrv_pwrite(s->hd,
243
                refcount_block_offset + (first_index << REFCOUNT_SHIFT),
244
                &s->refcount_block_cache[first_index], size) != size)
245
            {
246
                return -EIO;
247
            }
248

  
249
            first_index = -1;
250
            last_index = -1;
251
        }
252

  
253
        /* Load the refcount block and allocate it if needed */
254
        refcount_block_offset = alloc_refcount_block(bs, cluster_index);
255
        if (refcount_block_offset < 0) {
256
            return refcount_block_offset;
257
        }
258

  
259
        /* we can update the count and save it */
260
        block_index = cluster_index &
261
            ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
262
        if (first_index == -1 || block_index < first_index) {
263
            first_index = block_index;
264
        }
265
        if (block_index > last_index) {
266
            last_index = block_index;
267
        }
268

  
269
        refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
270
        refcount += addend;
271
        if (refcount < 0 || refcount > 0xffff)
272
            return -EINVAL;
273
        if (refcount == 0 && cluster_index < s->free_cluster_index) {
274
            s->free_cluster_index = cluster_index;
275
        }
276
        s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
277
    }
278

  
279
    /* Write last changed block to disk */
280
    if (refcount_block_offset != 0) {
281
        size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
282
        if (bdrv_pwrite(s->hd,
283
            refcount_block_offset + (first_index << REFCOUNT_SHIFT),
284
            &s->refcount_block_cache[first_index], size) != size)
285
        {
286
            return -EIO;
287
        }
288
    }
289

  
290
    return 0;
291
}
292

  
293
/* addend must be 1 or -1 */
294
static int update_cluster_refcount(BlockDriverState *bs,
295
                                   int64_t cluster_index,
296
                                   int addend)
297
{
298
    BDRVQcowState *s = bs->opaque;
299
    int ret;
300

  
301
    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
302
    if (ret < 0) {
303
        return ret;
304
    }
305

  
306
    return get_refcount(bs, cluster_index);
307
}
308

  
309

  
310

  
311
/*********************************************************/
312
/* cluster allocation functions */
313

  
314

  
315

  
316
/* return < 0 if error */
317
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
318
{
319
    BDRVQcowState *s = bs->opaque;
320
    int i, nb_clusters;
321

  
322
    nb_clusters = size_to_clusters(s, size);
323
retry:
324
    for(i = 0; i < nb_clusters; i++) {
325
        int64_t i = s->free_cluster_index++;
326
        if (get_refcount(bs, i) != 0)
327
            goto retry;
328
    }
329
#ifdef DEBUG_ALLOC2
330
    printf("alloc_clusters: size=%lld -> %lld\n",
331
            size,
332
            (s->free_cluster_index - nb_clusters) << s->cluster_bits);
333
#endif
334
    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
335
}
336

  
337
int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
338
{
339
    int64_t offset;
340

  
341
    offset = alloc_clusters_noref(bs, size);
342
    update_refcount(bs, offset, size, 1);
343
    return offset;
344
}
345

  
346
/* only used to allocate compressed sectors. We try to allocate
347
   contiguous sectors. size must be <= cluster_size */
348
int64_t alloc_bytes(BlockDriverState *bs, int size)
349
{
350
    BDRVQcowState *s = bs->opaque;
351
    int64_t offset, cluster_offset;
352
    int free_in_cluster;
353

  
354
    assert(size > 0 && size <= s->cluster_size);
355
    if (s->free_byte_offset == 0) {
356
        s->free_byte_offset = alloc_clusters(bs, s->cluster_size);
357
    }
358
 redo:
359
    free_in_cluster = s->cluster_size -
360
        (s->free_byte_offset & (s->cluster_size - 1));
361
    if (size <= free_in_cluster) {
362
        /* enough space in current cluster */
363
        offset = s->free_byte_offset;
364
        s->free_byte_offset += size;
365
        free_in_cluster -= size;
366
        if (free_in_cluster == 0)
367
            s->free_byte_offset = 0;
368
        if ((offset & (s->cluster_size - 1)) != 0)
369
            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
370
    } else {
371
        offset = alloc_clusters(bs, s->cluster_size);
372
        cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
373
        if ((cluster_offset + s->cluster_size) == offset) {
374
            /* we are lucky: contiguous data */
375
            offset = s->free_byte_offset;
376
            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
377
            s->free_byte_offset += size;
378
        } else {
379
            s->free_byte_offset = offset;
380
            goto redo;
381
        }
382
    }
383
    return offset;
384
}
385

  
386
void free_clusters(BlockDriverState *bs,
387
                          int64_t offset, int64_t size)
388
{
389
    update_refcount(bs, offset, size, -1);
390
}
391

  
392

  
393

  
394
/*********************************************************/
395
/* snapshots and image creation */
396

  
397

  
398

  
399
void create_refcount_update(QCowCreateState *s, int64_t offset, int64_t size)
400
{
401
    int refcount;
402
    int64_t start, last, cluster_offset;
403
    uint16_t *p;
404

  
405
    start = offset & ~(s->cluster_size - 1);
406
    last = (offset + size - 1)  & ~(s->cluster_size - 1);
407
    for(cluster_offset = start; cluster_offset <= last;
408
        cluster_offset += s->cluster_size) {
409
        p = &s->refcount_block[cluster_offset >> s->cluster_bits];
410
        refcount = be16_to_cpu(*p);
411
        refcount++;
412
        *p = cpu_to_be16(refcount);
413
    }
414
}
415

  
416
/* update the refcounts of snapshots and the copied flag */
417
int update_snapshot_refcount(BlockDriverState *bs,
418
                             int64_t l1_table_offset,
419
                             int l1_size,
420
                             int addend)
421
{
422
    BDRVQcowState *s = bs->opaque;
423
    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
424
    int64_t old_offset, old_l2_offset;
425
    int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
426

  
427
    l2_cache_reset(bs);
428

  
429
    l2_table = NULL;
430
    l1_table = NULL;
431
    l1_size2 = l1_size * sizeof(uint64_t);
432
    l1_allocated = 0;
433
    if (l1_table_offset != s->l1_table_offset) {
434
        l1_table = qemu_malloc(l1_size2);
435
        l1_allocated = 1;
436
        if (bdrv_pread(s->hd, l1_table_offset,
437
                       l1_table, l1_size2) != l1_size2)
438
            goto fail;
439
        for(i = 0;i < l1_size; i++)
440
            be64_to_cpus(&l1_table[i]);
441
    } else {
442
        assert(l1_size == s->l1_size);
443
        l1_table = s->l1_table;
444
        l1_allocated = 0;
445
    }
446

  
447
    l2_size = s->l2_size * sizeof(uint64_t);
448
    l2_table = qemu_malloc(l2_size);
449
    l1_modified = 0;
450
    for(i = 0; i < l1_size; i++) {
451
        l2_offset = l1_table[i];
452
        if (l2_offset) {
453
            old_l2_offset = l2_offset;
454
            l2_offset &= ~QCOW_OFLAG_COPIED;
455
            l2_modified = 0;
456
            if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
457
                goto fail;
458
            for(j = 0; j < s->l2_size; j++) {
459
                offset = be64_to_cpu(l2_table[j]);
460
                if (offset != 0) {
461
                    old_offset = offset;
462
                    offset &= ~QCOW_OFLAG_COPIED;
463
                    if (offset & QCOW_OFLAG_COMPRESSED) {
464
                        nb_csectors = ((offset >> s->csize_shift) &
465
                                       s->csize_mask) + 1;
466
                        if (addend != 0)
467
                            update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
468
                                            nb_csectors * 512, addend);
469
                        /* compressed clusters are never modified */
470
                        refcount = 2;
471
                    } else {
472
                        if (addend != 0) {
473
                            refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
474
                        } else {
475
                            refcount = get_refcount(bs, offset >> s->cluster_bits);
476
                        }
477
                    }
478

  
479
                    if (refcount == 1) {
480
                        offset |= QCOW_OFLAG_COPIED;
481
                    }
482
                    if (offset != old_offset) {
483
                        l2_table[j] = cpu_to_be64(offset);
484
                        l2_modified = 1;
485
                    }
486
                }
487
            }
488
            if (l2_modified) {
489
                if (bdrv_pwrite(s->hd,
490
                                l2_offset, l2_table, l2_size) != l2_size)
491
                    goto fail;
492
            }
493

  
494
            if (addend != 0) {
495
                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
496
            } else {
497
                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
498
            }
499
            if (refcount == 1) {
500
                l2_offset |= QCOW_OFLAG_COPIED;
501
            }
502
            if (l2_offset != old_l2_offset) {
503
                l1_table[i] = l2_offset;
504
                l1_modified = 1;
505
            }
506
        }
507
    }
508
    if (l1_modified) {
509
        for(i = 0; i < l1_size; i++)
510
            cpu_to_be64s(&l1_table[i]);
511
        if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
512
                        l1_size2) != l1_size2)
513
            goto fail;
514
        for(i = 0; i < l1_size; i++)
515
            be64_to_cpus(&l1_table[i]);
516
    }
517
    if (l1_allocated)
518
        qemu_free(l1_table);
519
    qemu_free(l2_table);
520
    return 0;
521
 fail:
522
    if (l1_allocated)
523
        qemu_free(l1_table);
524
    qemu_free(l2_table);
525
    return -EIO;
526
}
527

  
528

  
529

  
530

  
531
/*********************************************************/
532
/* refcount checking functions */
533

  
534

  
535

  
536
/*
537
 * Increases the refcount for a range of clusters in a given refcount table.
538
 * This is used to construct a temporary refcount table out of L1 and L2 tables
539
 * which can be compared the the refcount table saved in the image.
540
 *
541
 * Returns the number of errors in the image that were found
542
 */
543
static int inc_refcounts(BlockDriverState *bs,
544
                          uint16_t *refcount_table,
545
                          int refcount_table_size,
546
                          int64_t offset, int64_t size)
547
{
548
    BDRVQcowState *s = bs->opaque;
549
    int64_t start, last, cluster_offset;
550
    int k;
551
    int errors = 0;
552

  
553
    if (size <= 0)
554
        return 0;
555

  
556
    start = offset & ~(s->cluster_size - 1);
557
    last = (offset + size - 1) & ~(s->cluster_size - 1);
558
    for(cluster_offset = start; cluster_offset <= last;
559
        cluster_offset += s->cluster_size) {
560
        k = cluster_offset >> s->cluster_bits;
561
        if (k < 0 || k >= refcount_table_size) {
562
            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
563
                cluster_offset);
564
            errors++;
565
        } else {
566
            if (++refcount_table[k] == 0) {
567
                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
568
                    "\n", cluster_offset);
569
                errors++;
570
            }
571
        }
572
    }
573

  
574
    return errors;
575
}
576

  
577
/*
578
 * Increases the refcount in the given refcount table for the all clusters
579
 * referenced in the L2 table. While doing so, performs some checks on L2
580
 * entries.
581
 *
582
 * Returns the number of errors found by the checks or -errno if an internal
583
 * error occurred.
584
 */
585
static int check_refcounts_l2(BlockDriverState *bs,
586
    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
587
    int check_copied)
588
{
589
    BDRVQcowState *s = bs->opaque;
590
    uint64_t *l2_table, offset;
591
    int i, l2_size, nb_csectors, refcount;
592
    int errors = 0;
593

  
594
    /* Read L2 table from disk */
595
    l2_size = s->l2_size * sizeof(uint64_t);
596
    l2_table = qemu_malloc(l2_size);
597

  
598
    if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
599
        goto fail;
600

  
601
    /* Do the actual checks */
602
    for(i = 0; i < s->l2_size; i++) {
603
        offset = be64_to_cpu(l2_table[i]);
604
        if (offset != 0) {
605
            if (offset & QCOW_OFLAG_COMPRESSED) {
606
                /* Compressed clusters don't have QCOW_OFLAG_COPIED */
607
                if (offset & QCOW_OFLAG_COPIED) {
608
                    fprintf(stderr, "ERROR: cluster %" PRId64 ": "
609
                        "copied flag must never be set for compressed "
610
                        "clusters\n", offset >> s->cluster_bits);
611
                    offset &= ~QCOW_OFLAG_COPIED;
612
                    errors++;
613
                }
614

  
615
                /* Mark cluster as used */
616
                nb_csectors = ((offset >> s->csize_shift) &
617
                               s->csize_mask) + 1;
618
                offset &= s->cluster_offset_mask;
619
                errors += inc_refcounts(bs, refcount_table,
620
                              refcount_table_size,
621
                              offset & ~511, nb_csectors * 512);
622
            } else {
623
                /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
624
                if (check_copied) {
625
                    uint64_t entry = offset;
626
                    offset &= ~QCOW_OFLAG_COPIED;
627
                    refcount = get_refcount(bs, offset >> s->cluster_bits);
628
                    if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
629
                        fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
630
                            PRIx64 " refcount=%d\n", entry, refcount);
631
                        errors++;
632
                    }
633
                }
634

  
635
                /* Mark cluster as used */
636
                offset &= ~QCOW_OFLAG_COPIED;
637
                errors += inc_refcounts(bs, refcount_table,
638
                              refcount_table_size,
639
                              offset, s->cluster_size);
640

  
641
                /* Correct offsets are cluster aligned */
642
                if (offset & (s->cluster_size - 1)) {
643
                    fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
644
                        "properly aligned; L2 entry corrupted.\n", offset);
645
                    errors++;
646
                }
647
            }
648
        }
649
    }
650

  
651
    qemu_free(l2_table);
652
    return errors;
653

  
654
fail:
655
    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
656
    qemu_free(l2_table);
657
    return -EIO;
658
}
659

  
660
/*
661
 * Increases the refcount for the L1 table, its L2 tables and all referenced
662
 * clusters in the given refcount table. While doing so, performs some checks
663
 * on L1 and L2 entries.
664
 *
665
 * Returns the number of errors found by the checks or -errno if an internal
666
 * error occurred.
667
 */
668
static int check_refcounts_l1(BlockDriverState *bs,
669
                              uint16_t *refcount_table,
670
                              int refcount_table_size,
671
                              int64_t l1_table_offset, int l1_size,
672
                              int check_copied)
673
{
674
    BDRVQcowState *s = bs->opaque;
675
    uint64_t *l1_table, l2_offset, l1_size2;
676
    int i, refcount, ret;
677
    int errors = 0;
678

  
679
    l1_size2 = l1_size * sizeof(uint64_t);
680

  
681
    /* Mark L1 table as used */
682
    errors += inc_refcounts(bs, refcount_table, refcount_table_size,
683
                  l1_table_offset, l1_size2);
684

  
685
    /* Read L1 table entries from disk */
686
    l1_table = qemu_malloc(l1_size2);
687
    if (bdrv_pread(s->hd, l1_table_offset,
688
                   l1_table, l1_size2) != l1_size2)
689
        goto fail;
690
    for(i = 0;i < l1_size; i++)
691
        be64_to_cpus(&l1_table[i]);
692

  
693
    /* Do the actual checks */
694
    for(i = 0; i < l1_size; i++) {
695
        l2_offset = l1_table[i];
696
        if (l2_offset) {
697
            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
698
            if (check_copied) {
699
                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
700
                    >> s->cluster_bits);
701
                if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
702
                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
703
                        " refcount=%d\n", l2_offset, refcount);
704
                    errors++;
705
                }
706
            }
707

  
708
            /* Mark L2 table as used */
709
            l2_offset &= ~QCOW_OFLAG_COPIED;
710
            errors += inc_refcounts(bs, refcount_table,
711
                          refcount_table_size,
712
                          l2_offset,
713
                          s->cluster_size);
714

  
715
            /* L2 tables are cluster aligned */
716
            if (l2_offset & (s->cluster_size - 1)) {
717
                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
718
                    "cluster aligned; L1 entry corrupted\n", l2_offset);
719
                errors++;
720
            }
721

  
722
            /* Process and check L2 entries */
723
            ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
724
                l2_offset, check_copied);
725
            if (ret < 0) {
726
                goto fail;
727
            }
728
            errors += ret;
729
        }
730
    }
731
    qemu_free(l1_table);
732
    return errors;
733

  
734
fail:
735
    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
736
    qemu_free(l1_table);
737
    return -EIO;
738
}
739

  
740
/*
741
 * Checks an image for refcount consistency.
742
 *
743
 * Returns 0 if no errors are found, the number of errors in case the image is
744
 * detected as corrupted, and -errno when an internal error occured.
745
 */
746
int check_refcounts(BlockDriverState *bs)
747
{
748
    BDRVQcowState *s = bs->opaque;
749
    int64_t size;
750
    int nb_clusters, refcount1, refcount2, i;
751
    QCowSnapshot *sn;
752
    uint16_t *refcount_table;
753
    int ret, errors = 0;
754

  
755
    size = bdrv_getlength(s->hd);
756
    nb_clusters = size_to_clusters(s, size);
757
    refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
758

  
759
    /* header */
760
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
761
                  0, s->cluster_size);
762

  
763
    /* current L1 table */
764
    ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
765
                       s->l1_table_offset, s->l1_size, 1);
766
    if (ret < 0) {
767
        return ret;
768
    }
769
    errors += ret;
770

  
771
    /* snapshots */
772
    for(i = 0; i < s->nb_snapshots; i++) {
773
        sn = s->snapshots + i;
774
        check_refcounts_l1(bs, refcount_table, nb_clusters,
775
                           sn->l1_table_offset, sn->l1_size, 0);
776
    }
777
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
778
                  s->snapshots_offset, s->snapshots_size);
779

  
780
    /* refcount data */
781
    errors += inc_refcounts(bs, refcount_table, nb_clusters,
782
                  s->refcount_table_offset,
783
                  s->refcount_table_size * sizeof(uint64_t));
784
    for(i = 0; i < s->refcount_table_size; i++) {
785
        int64_t offset;
786
        offset = s->refcount_table[i];
787
        if (offset != 0) {
788
            errors += inc_refcounts(bs, refcount_table, nb_clusters,
789
                          offset, s->cluster_size);
790
        }
791
    }
792

  
793
    /* compare ref counts */
794
    for(i = 0; i < nb_clusters; i++) {
795
        refcount1 = get_refcount(bs, i);
796
        refcount2 = refcount_table[i];
797
        if (refcount1 != refcount2) {
798
            fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
799
                   i, refcount1, refcount2);
800
            errors++;
801
        }
802
    }
803

  
804
    qemu_free(refcount_table);
805

  
806
    return errors;
807
}
808

  
b/block/qcow2.c
26 26
#include "module.h"
27 27
#include <zlib.h>
28 28
#include "aes.h"
29
#include "block/qcow2.h"
29 30

  
30 31
/*
31 32
  Differences with QCOW:
......
47 48
//#define DEBUG_ALLOC2
48 49
//#define DEBUG_EXT
49 50

  
50
#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
51
#define QCOW_VERSION 2
52

  
53
#define QCOW_CRYPT_NONE 0
54
#define QCOW_CRYPT_AES  1
55

  
56
#define QCOW_MAX_CRYPT_CLUSTERS 32
57

  
58
/* indicate that the refcount of the referenced cluster is exactly one. */
59
#define QCOW_OFLAG_COPIED     (1LL << 63)
60
/* indicate that the cluster is compressed (they never have the copied flag) */
61
#define QCOW_OFLAG_COMPRESSED (1LL << 62)
62

  
63
#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
64

  
65
#define MIN_CLUSTER_BITS 9
66
#define MAX_CLUSTER_BITS 16
67

  
68
typedef struct QCowHeader {
69
    uint32_t magic;
70
    uint32_t version;
71
    uint64_t backing_file_offset;
72
    uint32_t backing_file_size;
73
    uint32_t cluster_bits;
74
    uint64_t size; /* in bytes */
75
    uint32_t crypt_method;
76
    uint32_t l1_size; /* XXX: save number of clusters instead ? */
77
    uint64_t l1_table_offset;
78
    uint64_t refcount_table_offset;
79
    uint32_t refcount_table_clusters;
80
    uint32_t nb_snapshots;
81
    uint64_t snapshots_offset;
82
} QCowHeader;
83

  
84 51

  
85 52
typedef struct {
86 53
    uint32_t magic;
......
110 77
    /* name follows  */
111 78
} QCowSnapshotHeader;
112 79

  
113
#define L2_CACHE_SIZE 16
114

  
115
typedef struct QCowSnapshot {
116
    uint64_t l1_table_offset;
117
    uint32_t l1_size;
118
    char *id_str;
119
    char *name;
120
    uint32_t vm_state_size;
121
    uint32_t date_sec;
122
    uint32_t date_nsec;
123
    uint64_t vm_clock_nsec;
124
} QCowSnapshot;
125

  
126
typedef struct BDRVQcowState {
127
    BlockDriverState *hd;
128
    int cluster_bits;
129
    int cluster_size;
130
    int cluster_sectors;
131
    int l2_bits;
132
    int l2_size;
133
    int l1_size;
134
    int l1_vm_state_index;
135
    int csize_shift;
136
    int csize_mask;
137
    uint64_t cluster_offset_mask;
138
    uint64_t l1_table_offset;
139
    uint64_t *l1_table;
140
    uint64_t *l2_cache;
141
    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
142
    uint32_t l2_cache_counts[L2_CACHE_SIZE];
143
    uint8_t *cluster_cache;
144
    uint8_t *cluster_data;
145
    uint64_t cluster_cache_offset;
146

  
147
    uint64_t *refcount_table;
148
    uint64_t refcount_table_offset;
149
    uint32_t refcount_table_size;
150
    uint64_t refcount_block_cache_offset;
151
    uint16_t *refcount_block_cache;
152
    int64_t free_cluster_index;
153
    int64_t free_byte_offset;
154

  
155
    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
156
    uint32_t crypt_method_header;
157
    AES_KEY aes_encrypt_key;
158
    AES_KEY aes_decrypt_key;
159
    uint64_t snapshots_offset;
160
    int snapshots_size;
161
    int nb_snapshots;
162
    QCowSnapshot *snapshots;
163
} BDRVQcowState;
164 80

  
165 81
static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
166 82
static int qcow_read(BlockDriverState *bs, int64_t sector_num,
167 83
                     uint8_t *buf, int nb_sectors);
168 84
static int qcow_read_snapshots(BlockDriverState *bs);
169 85
static void qcow_free_snapshots(BlockDriverState *bs);
170
static int refcount_init(BlockDriverState *bs);
171
static void refcount_close(BlockDriverState *bs);
172
static int get_refcount(BlockDriverState *bs, int64_t cluster_index);
173
static int update_cluster_refcount(BlockDriverState *bs,
174
                                   int64_t cluster_index,
175
                                   int addend);
176
static int update_refcount(BlockDriverState *bs,
177
                            int64_t offset, int64_t length,
178
                            int addend);
179
static int64_t alloc_clusters(BlockDriverState *bs, int64_t size);
180
static int64_t alloc_bytes(BlockDriverState *bs, int size);
181
static void free_clusters(BlockDriverState *bs,
182
                          int64_t offset, int64_t size);
183
static int check_refcounts(BlockDriverState *bs);
184 86

  
185 87
static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
186 88
{
......
482 384
    return 0;
483 385
}
484 386

  
485
static void l2_cache_reset(BlockDriverState *bs)
387
void l2_cache_reset(BlockDriverState *bs)
486 388
{
487 389
    BDRVQcowState *s = bs->opaque;
488 390

  
......
691 593
    return l2_table;
692 594
}
693 595

  
694
static int size_to_clusters(BDRVQcowState *s, int64_t size)
695
{
696
    return (size + (s->cluster_size - 1)) >> s->cluster_bits;
697
}
698

  
699 596
static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
700 597
        uint64_t *l2_table, uint64_t start, uint64_t mask)
701 598
{
......
1522 1419
    bdrv_delete(s->hd);
1523 1420
}
1524 1421

  
1525
/* XXX: use std qcow open function ? */
1526
typedef struct QCowCreateState {
1527
    int cluster_size;
1528
    int cluster_bits;
1529
    uint16_t *refcount_block;
1530
    uint64_t *refcount_table;
1531
    int64_t l1_table_offset;
1532
    int64_t refcount_table_offset;
1533
    int64_t refcount_block_offset;
1534
} QCowCreateState;
1535

  
1536
static void create_refcount_update(QCowCreateState *s,
1537
                                   int64_t offset, int64_t size)
1538
{
1539
    int refcount;
1540
    int64_t start, last, cluster_offset;
1541
    uint16_t *p;
1542

  
1543
    start = offset & ~(s->cluster_size - 1);
1544
    last = (offset + size - 1)  & ~(s->cluster_size - 1);
1545
    for(cluster_offset = start; cluster_offset <= last;
1546
        cluster_offset += s->cluster_size) {
1547
        p = &s->refcount_block[cluster_offset >> s->cluster_bits];
1548
        refcount = be16_to_cpu(*p);
1549
        refcount++;
1550
        *p = cpu_to_be16(refcount);
1551
    }
1552
}
1553

  
1554 1422
static int get_bits_from_size(size_t size)
1555 1423
{
1556 1424
    int res = 0;
......
1834 1702
/*********************************************************/
1835 1703
/* snapshot support */
1836 1704

  
1837
/* update the refcounts of snapshots and the copied flag */
1838
static int update_snapshot_refcount(BlockDriverState *bs,
1839
                                    int64_t l1_table_offset,
1840
                                    int l1_size,
1841
                                    int addend)
1842
{
1843
    BDRVQcowState *s = bs->opaque;
1844
    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
1845
    int64_t old_offset, old_l2_offset;
1846
    int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
1847

  
1848
    l2_cache_reset(bs);
1849

  
1850
    l2_table = NULL;
1851
    l1_table = NULL;
1852
    l1_size2 = l1_size * sizeof(uint64_t);
1853
    l1_allocated = 0;
1854
    if (l1_table_offset != s->l1_table_offset) {
1855
        l1_table = qemu_malloc(l1_size2);
1856
        l1_allocated = 1;
1857
        if (bdrv_pread(s->hd, l1_table_offset,
1858
                       l1_table, l1_size2) != l1_size2)
1859
            goto fail;
1860
        for(i = 0;i < l1_size; i++)
1861
            be64_to_cpus(&l1_table[i]);
1862
    } else {
1863
        assert(l1_size == s->l1_size);
1864
        l1_table = s->l1_table;
1865
        l1_allocated = 0;
1866
    }
1867

  
1868
    l2_size = s->l2_size * sizeof(uint64_t);
1869
    l2_table = qemu_malloc(l2_size);
1870
    l1_modified = 0;
1871
    for(i = 0; i < l1_size; i++) {
1872
        l2_offset = l1_table[i];
1873
        if (l2_offset) {
1874
            old_l2_offset = l2_offset;
1875
            l2_offset &= ~QCOW_OFLAG_COPIED;
1876
            l2_modified = 0;
1877
            if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
1878
                goto fail;
1879
            for(j = 0; j < s->l2_size; j++) {
1880
                offset = be64_to_cpu(l2_table[j]);
1881
                if (offset != 0) {
1882
                    old_offset = offset;
1883
                    offset &= ~QCOW_OFLAG_COPIED;
1884
                    if (offset & QCOW_OFLAG_COMPRESSED) {
1885
                        nb_csectors = ((offset >> s->csize_shift) &
1886
                                       s->csize_mask) + 1;
1887
                        if (addend != 0)
1888
                            update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
1889
                                            nb_csectors * 512, addend);
1890
                        /* compressed clusters are never modified */
1891
                        refcount = 2;
1892
                    } else {
1893
                        if (addend != 0) {
1894
                            refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
1895
                        } else {
1896
                            refcount = get_refcount(bs, offset >> s->cluster_bits);
1897
                        }
1898
                    }
1899

  
1900
                    if (refcount == 1) {
1901
                        offset |= QCOW_OFLAG_COPIED;
1902
                    }
1903
                    if (offset != old_offset) {
1904
                        l2_table[j] = cpu_to_be64(offset);
1905
                        l2_modified = 1;
1906
                    }
1907
                }
1908
            }
1909
            if (l2_modified) {
1910
                if (bdrv_pwrite(s->hd,
1911
                                l2_offset, l2_table, l2_size) != l2_size)
1912
                    goto fail;
1913
            }
1914

  
1915
            if (addend != 0) {
1916
                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
1917
            } else {
1918
                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
1919
            }
1920
            if (refcount == 1) {
1921
                l2_offset |= QCOW_OFLAG_COPIED;
1922
            }
1923
            if (l2_offset != old_l2_offset) {
1924
                l1_table[i] = l2_offset;
1925
                l1_modified = 1;
1926
            }
1927
        }
1928
    }
1929
    if (l1_modified) {
1930
        for(i = 0; i < l1_size; i++)
1931
            cpu_to_be64s(&l1_table[i]);
1932
        if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
1933
                        l1_size2) != l1_size2)
1934
            goto fail;
1935
        for(i = 0; i < l1_size; i++)
1936
            be64_to_cpus(&l1_table[i]);
1937
    }
1938
    if (l1_allocated)
1939
        qemu_free(l1_table);
1940
    qemu_free(l2_table);
1941
    return 0;
1942
 fail:
1943
    if (l1_allocated)
1944
        qemu_free(l1_table);
1945
    qemu_free(l2_table);
1946
    return -EIO;
1947
}
1948 1705

  
1949 1706
static void qcow_free_snapshots(BlockDriverState *bs)
1950 1707
{
......
2306 2063
    return s->nb_snapshots;
2307 2064
}
2308 2065

  
2309
/*********************************************************/
2310
/* refcount handling */
2311

  
2312
static int refcount_init(BlockDriverState *bs)
2313
{
2314
    BDRVQcowState *s = bs->opaque;
2315
    int ret, refcount_table_size2, i;
2316

  
2317
    s->refcount_block_cache = qemu_malloc(s->cluster_size);
2318
    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
2319
    s->refcount_table = qemu_malloc(refcount_table_size2);
2320
    if (s->refcount_table_size > 0) {
2321
        ret = bdrv_pread(s->hd, s->refcount_table_offset,
2322
                         s->refcount_table, refcount_table_size2);
2323
        if (ret != refcount_table_size2)
2324
            goto fail;
2325
        for(i = 0; i < s->refcount_table_size; i++)
2326
            be64_to_cpus(&s->refcount_table[i]);
2327
    }
2328
    return 0;
2329
 fail:
2330
    return -ENOMEM;
2331
}
2332

  
2333
static void refcount_close(BlockDriverState *bs)
2334
{
2335
    BDRVQcowState *s = bs->opaque;
2336
    qemu_free(s->refcount_block_cache);
2337
    qemu_free(s->refcount_table);
2338
}
2339

  
2340

  
2341
static int load_refcount_block(BlockDriverState *bs,
2342
                               int64_t refcount_block_offset)
2343
{
2344
    BDRVQcowState *s = bs->opaque;
2345
    int ret;
2346
    ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
2347
                     s->cluster_size);
2348
    if (ret != s->cluster_size)
2349
        return -EIO;
2350
    s->refcount_block_cache_offset = refcount_block_offset;
2351
    return 0;
2352
}
2353

  
2354
static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
2355
{
2356
    BDRVQcowState *s = bs->opaque;
2357
    int refcount_table_index, block_index;
2358
    int64_t refcount_block_offset;
2359

  
2360
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
2361
    if (refcount_table_index >= s->refcount_table_size)
2362
        return 0;
2363
    refcount_block_offset = s->refcount_table[refcount_table_index];
2364
    if (!refcount_block_offset)
2365
        return 0;
2366
    if (refcount_block_offset != s->refcount_block_cache_offset) {
2367
        /* better than nothing: return allocated if read error */
2368
        if (load_refcount_block(bs, refcount_block_offset) < 0)
2369
            return 1;
2370
    }
2371
    block_index = cluster_index &
2372
        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
2373
    return be16_to_cpu(s->refcount_block_cache[block_index]);
2374
}
2375

  
2376
/* return < 0 if error */
2377
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
2378
{
2379
    BDRVQcowState *s = bs->opaque;
2380
    int i, nb_clusters;
2381

  
2382
    nb_clusters = size_to_clusters(s, size);
2383
retry:
2384
    for(i = 0; i < nb_clusters; i++) {
2385
        int64_t i = s->free_cluster_index++;
2386
        if (get_refcount(bs, i) != 0)
2387
            goto retry;
2388
    }
2389
#ifdef DEBUG_ALLOC2
2390
    printf("alloc_clusters: size=%lld -> %lld\n",
2391
            size,
2392
            (s->free_cluster_index - nb_clusters) << s->cluster_bits);
2393
#endif
2394
    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
2395
}
2396

  
2397
static int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
2398
{
2399
    int64_t offset;
2400

  
2401
    offset = alloc_clusters_noref(bs, size);
2402
    update_refcount(bs, offset, size, 1);
2403
    return offset;
2404
}
2405

  
2406
/* only used to allocate compressed sectors. We try to allocate
2407
   contiguous sectors. size must be <= cluster_size */
2408
static int64_t alloc_bytes(BlockDriverState *bs, int size)
2409
{
2410
    BDRVQcowState *s = bs->opaque;
2411
    int64_t offset, cluster_offset;
2412
    int free_in_cluster;
2413

  
2414
    assert(size > 0 && size <= s->cluster_size);
2415
    if (s->free_byte_offset == 0) {
2416
        s->free_byte_offset = alloc_clusters(bs, s->cluster_size);
2417
    }
2418
 redo:
2419
    free_in_cluster = s->cluster_size -
2420
        (s->free_byte_offset & (s->cluster_size - 1));
2421
    if (size <= free_in_cluster) {
2422
        /* enough space in current cluster */
2423
        offset = s->free_byte_offset;
2424
        s->free_byte_offset += size;
2425
        free_in_cluster -= size;
2426
        if (free_in_cluster == 0)
2427
            s->free_byte_offset = 0;
2428
        if ((offset & (s->cluster_size - 1)) != 0)
2429
            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
2430
    } else {
2431
        offset = alloc_clusters(bs, s->cluster_size);
2432
        cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
2433
        if ((cluster_offset + s->cluster_size) == offset) {
2434
            /* we are lucky: contiguous data */
2435
            offset = s->free_byte_offset;
2436
            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
2437
            s->free_byte_offset += size;
2438
        } else {
2439
            s->free_byte_offset = offset;
2440
            goto redo;
2441
        }
2442
    }
2443
    return offset;
2444
}
2445

  
2446
static void free_clusters(BlockDriverState *bs,
2447
                          int64_t offset, int64_t size)
2448
{
2449
    update_refcount(bs, offset, size, -1);
2450
}
2451

  
2452
static int grow_refcount_table(BlockDriverState *bs, int min_size)
2453
{
2454
    BDRVQcowState *s = bs->opaque;
2455
    int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
2456
    uint64_t *new_table;
2457
    int64_t table_offset;
2458
    uint8_t data[12];
2459
    int old_table_size;
2460
    int64_t old_table_offset;
2461

  
2462
    if (min_size <= s->refcount_table_size)
2463
        return 0;
2464
    /* compute new table size */
2465
    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
2466
    for(;;) {
2467
        if (refcount_table_clusters == 0) {
2468
            refcount_table_clusters = 1;
2469
        } else {
2470
            refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
2471
        }
2472
        new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
2473
        if (min_size <= new_table_size)
2474
            break;
2475
    }
2476
#ifdef DEBUG_ALLOC2
2477
    printf("grow_refcount_table from %d to %d\n",
2478
           s->refcount_table_size,
2479
           new_table_size);
2480
#endif
2481
    new_table_size2 = new_table_size * sizeof(uint64_t);
2482
    new_table = qemu_mallocz(new_table_size2);
2483
    memcpy(new_table, s->refcount_table,
2484
           s->refcount_table_size * sizeof(uint64_t));
2485
    for(i = 0; i < s->refcount_table_size; i++)
2486
        cpu_to_be64s(&new_table[i]);
2487
    /* Note: we cannot update the refcount now to avoid recursion */
2488
    table_offset = alloc_clusters_noref(bs, new_table_size2);
2489
    ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
2490
    if (ret != new_table_size2)
2491
        goto fail;
2492
    for(i = 0; i < s->refcount_table_size; i++)
2493
        be64_to_cpus(&new_table[i]);
2494

  
2495
    cpu_to_be64w((uint64_t*)data, table_offset);
2496
    cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
2497
    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
2498
                    data, sizeof(data)) != sizeof(data))
2499
        goto fail;
2500
    qemu_free(s->refcount_table);
2501
    old_table_offset = s->refcount_table_offset;
2502
    old_table_size = s->refcount_table_size;
2503
    s->refcount_table = new_table;
2504
    s->refcount_table_size = new_table_size;
2505
    s->refcount_table_offset = table_offset;
2506

  
2507
    update_refcount(bs, table_offset, new_table_size2, 1);
2508
    free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
2509
    return 0;
2510
 fail:
2511
    free_clusters(bs, table_offset, new_table_size2);
2512
    qemu_free(new_table);
2513
    return -EIO;
2514
}
2515

  
2516

  
2517
static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
2518
{
2519
    BDRVQcowState *s = bs->opaque;
2520
    int64_t offset, refcount_block_offset;
2521
    int ret, refcount_table_index;
2522
    uint64_t data64;
2523

  
2524
    /* Find L1 index and grow refcount table if needed */
2525
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
2526
    if (refcount_table_index >= s->refcount_table_size) {
2527
        ret = grow_refcount_table(bs, refcount_table_index + 1);
2528
        if (ret < 0)
2529
            return ret;
2530
    }
2531

  
2532
    /* Load or allocate the refcount block */
2533
    refcount_block_offset = s->refcount_table[refcount_table_index];
2534
    if (!refcount_block_offset) {
2535
        /* create a new refcount block */
2536
        /* Note: we cannot update the refcount now to avoid recursion */
2537
        offset = alloc_clusters_noref(bs, s->cluster_size);
2538
        memset(s->refcount_block_cache, 0, s->cluster_size);
2539
        ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
2540
        if (ret != s->cluster_size)
2541
            return -EINVAL;
2542
        s->refcount_table[refcount_table_index] = offset;
2543
        data64 = cpu_to_be64(offset);
2544
        ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
2545
                          refcount_table_index * sizeof(uint64_t),
2546
                          &data64, sizeof(data64));
2547
        if (ret != sizeof(data64))
2548
            return -EINVAL;
2549

  
2550
        refcount_block_offset = offset;
2551
        s->refcount_block_cache_offset = offset;
2552
        update_refcount(bs, offset, s->cluster_size, 1);
2553
    } else {
2554
        if (refcount_block_offset != s->refcount_block_cache_offset) {
2555
            if (load_refcount_block(bs, refcount_block_offset) < 0)
2556
                return -EIO;
2557
        }
2558
    }
2559

  
2560
    return refcount_block_offset;
2561
}
2562

  
2563
/* addend must be 1 or -1 */
2564
static int update_cluster_refcount(BlockDriverState *bs,
2565
                                   int64_t cluster_index,
2566
                                   int addend)
2567
{
2568
    BDRVQcowState *s = bs->opaque;
2569
    int ret;
2570

  
2571
    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend);
2572
    if (ret < 0) {
2573
        return ret;
2574
    }
2575

  
2576
    return get_refcount(bs, cluster_index);
2577
}
2578

  
2579
/* XXX: cache several refcount block clusters ? */
2580
static int update_refcount(BlockDriverState *bs,
2581
                            int64_t offset, int64_t length,
2582
                            int addend)
2583
{
2584
    BDRVQcowState *s = bs->opaque;
2585
    int64_t start, last, cluster_offset;
2586
    int64_t refcount_block_offset = 0;
2587
    int64_t table_index = -1, old_table_index;
2588
    int first_index = -1, last_index = -1;
2589

  
2590
#ifdef DEBUG_ALLOC2
2591
    printf("update_refcount: offset=%lld size=%lld addend=%d\n",
2592
           offset, length, addend);
2593
#endif
2594
    if (length <= 0)
2595
        return -EINVAL;
2596
    start = offset & ~(s->cluster_size - 1);
2597
    last = (offset + length - 1) & ~(s->cluster_size - 1);
2598
    for(cluster_offset = start; cluster_offset <= last;
2599
        cluster_offset += s->cluster_size)
2600
    {
2601
        int block_index, refcount;
2602
        int64_t cluster_index = cluster_offset >> s->cluster_bits;
2603

  
2604
        /* Only write refcount block to disk when we are done with it */
2605
        old_table_index = table_index;
2606
        table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
2607
        if ((old_table_index >= 0) && (table_index != old_table_index)) {
2608
            size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
2609
            if (bdrv_pwrite(s->hd,
2610
                refcount_block_offset + (first_index << REFCOUNT_SHIFT),
2611
                &s->refcount_block_cache[first_index], size) != size)
2612
            {
2613
                return -EIO;
2614
            }
2615

  
2616
            first_index = -1;
2617
            last_index = -1;
2618
        }
2619

  
2620
        /* Load the refcount block and allocate it if needed */
2621
        refcount_block_offset = alloc_refcount_block(bs, cluster_index);
2622
        if (refcount_block_offset < 0) {
2623
            return refcount_block_offset;
2624
        }
2625

  
2626
        /* we can update the count and save it */
2627
        block_index = cluster_index &
2628
            ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
2629
        if (first_index == -1 || block_index < first_index) {
2630
            first_index = block_index;
2631
        }
2632
        if (block_index > last_index) {
2633
            last_index = block_index;
2634
        }
2635

  
2636
        refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
2637
        refcount += addend;
2638
        if (refcount < 0 || refcount > 0xffff)
2639
            return -EINVAL;
2640
        if (refcount == 0 && cluster_index < s->free_cluster_index) {
2641
            s->free_cluster_index = cluster_index;
2642
        }
2643
        s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
2644
    }
2645

  
2646
    /* Write last changed block to disk */
2647
    if (refcount_block_offset != 0) {
2648
        size_t size = (last_index - first_index + 1) << REFCOUNT_SHIFT;
2649
        if (bdrv_pwrite(s->hd,
2650
            refcount_block_offset + (first_index << REFCOUNT_SHIFT),
2651
            &s->refcount_block_cache[first_index], size) != size)
2652
        {
2653
            return -EIO;
2654
        }
2655
    }
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff