Statistics
| Branch: | Revision:

root / block / qcow2-refcount.c @ 8c116b0e

History | View | Annotate | Download (58.7 kB)

1 f7d0fe02 Kevin Wolf
/*
2 f7d0fe02 Kevin Wolf
 * Block driver for the QCOW version 2 format
3 f7d0fe02 Kevin Wolf
 *
4 f7d0fe02 Kevin Wolf
 * Copyright (c) 2004-2006 Fabrice Bellard
5 f7d0fe02 Kevin Wolf
 *
6 f7d0fe02 Kevin Wolf
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 f7d0fe02 Kevin Wolf
 * of this software and associated documentation files (the "Software"), to deal
8 f7d0fe02 Kevin Wolf
 * in the Software without restriction, including without limitation the rights
9 f7d0fe02 Kevin Wolf
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 f7d0fe02 Kevin Wolf
 * copies of the Software, and to permit persons to whom the Software is
11 f7d0fe02 Kevin Wolf
 * furnished to do so, subject to the following conditions:
12 f7d0fe02 Kevin Wolf
 *
13 f7d0fe02 Kevin Wolf
 * The above copyright notice and this permission notice shall be included in
14 f7d0fe02 Kevin Wolf
 * all copies or substantial portions of the Software.
15 f7d0fe02 Kevin Wolf
 *
16 f7d0fe02 Kevin Wolf
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 f7d0fe02 Kevin Wolf
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 f7d0fe02 Kevin Wolf
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 f7d0fe02 Kevin Wolf
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 f7d0fe02 Kevin Wolf
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 f7d0fe02 Kevin Wolf
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 f7d0fe02 Kevin Wolf
 * THE SOFTWARE.
23 f7d0fe02 Kevin Wolf
 */
24 f7d0fe02 Kevin Wolf
25 f7d0fe02 Kevin Wolf
#include "qemu-common.h"
26 737e150e Paolo Bonzini
#include "block/block_int.h"
27 f7d0fe02 Kevin Wolf
#include "block/qcow2.h"
28 a40f1c2a Max Reitz
#include "qemu/range.h"
29 a40f1c2a Max Reitz
#include "qapi/qmp/types.h"
30 f7d0fe02 Kevin Wolf
31 f7d0fe02 Kevin Wolf
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
32 92dcb59f Kevin Wolf
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
33 f7d0fe02 Kevin Wolf
                            int64_t offset, int64_t length,
34 6cfcb9b8 Kevin Wolf
                            int addend, enum qcow2_discard_type type);
35 f7d0fe02 Kevin Wolf
36 3b88e52b Kevin Wolf
37 f7d0fe02 Kevin Wolf
/*********************************************************/
38 f7d0fe02 Kevin Wolf
/* refcount handling */
39 f7d0fe02 Kevin Wolf
40 ed6ccf0f Kevin Wolf
int qcow2_refcount_init(BlockDriverState *bs)
41 f7d0fe02 Kevin Wolf
{
42 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
43 f7d0fe02 Kevin Wolf
    int ret, refcount_table_size2, i;
44 f7d0fe02 Kevin Wolf
45 f7d0fe02 Kevin Wolf
    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
46 7267c094 Anthony Liguori
    s->refcount_table = g_malloc(refcount_table_size2);
47 f7d0fe02 Kevin Wolf
    if (s->refcount_table_size > 0) {
48 66f82cee Kevin Wolf
        BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
49 66f82cee Kevin Wolf
        ret = bdrv_pread(bs->file, s->refcount_table_offset,
50 f7d0fe02 Kevin Wolf
                         s->refcount_table, refcount_table_size2);
51 f7d0fe02 Kevin Wolf
        if (ret != refcount_table_size2)
52 f7d0fe02 Kevin Wolf
            goto fail;
53 f7d0fe02 Kevin Wolf
        for(i = 0; i < s->refcount_table_size; i++)
54 f7d0fe02 Kevin Wolf
            be64_to_cpus(&s->refcount_table[i]);
55 f7d0fe02 Kevin Wolf
    }
56 f7d0fe02 Kevin Wolf
    return 0;
57 f7d0fe02 Kevin Wolf
 fail:
58 f7d0fe02 Kevin Wolf
    return -ENOMEM;
59 f7d0fe02 Kevin Wolf
}
60 f7d0fe02 Kevin Wolf
61 ed6ccf0f Kevin Wolf
void qcow2_refcount_close(BlockDriverState *bs)
62 f7d0fe02 Kevin Wolf
{
63 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
64 7267c094 Anthony Liguori
    g_free(s->refcount_table);
65 f7d0fe02 Kevin Wolf
}
66 f7d0fe02 Kevin Wolf
67 f7d0fe02 Kevin Wolf
68 f7d0fe02 Kevin Wolf
static int load_refcount_block(BlockDriverState *bs,
69 29c1a730 Kevin Wolf
                               int64_t refcount_block_offset,
70 29c1a730 Kevin Wolf
                               void **refcount_block)
71 f7d0fe02 Kevin Wolf
{
72 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
73 f7d0fe02 Kevin Wolf
    int ret;
74 3b88e52b Kevin Wolf
75 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
76 29c1a730 Kevin Wolf
    ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
77 29c1a730 Kevin Wolf
        refcount_block);
78 e14e8ba5 Kevin Wolf
79 29c1a730 Kevin Wolf
    return ret;
80 f7d0fe02 Kevin Wolf
}
81 f7d0fe02 Kevin Wolf
82 018faafd Kevin Wolf
/*
83 018faafd Kevin Wolf
 * Returns the refcount of the cluster given by its index. Any non-negative
84 018faafd Kevin Wolf
 * return value is the refcount of the cluster, negative values are -errno
85 018faafd Kevin Wolf
 * and indicate an error.
86 018faafd Kevin Wolf
 */
87 f7d0fe02 Kevin Wolf
static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
88 f7d0fe02 Kevin Wolf
{
89 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
90 f7d0fe02 Kevin Wolf
    int refcount_table_index, block_index;
91 f7d0fe02 Kevin Wolf
    int64_t refcount_block_offset;
92 018faafd Kevin Wolf
    int ret;
93 29c1a730 Kevin Wolf
    uint16_t *refcount_block;
94 29c1a730 Kevin Wolf
    uint16_t refcount;
95 f7d0fe02 Kevin Wolf
96 f7d0fe02 Kevin Wolf
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
97 f7d0fe02 Kevin Wolf
    if (refcount_table_index >= s->refcount_table_size)
98 f7d0fe02 Kevin Wolf
        return 0;
99 f7d0fe02 Kevin Wolf
    refcount_block_offset = s->refcount_table[refcount_table_index];
100 f7d0fe02 Kevin Wolf
    if (!refcount_block_offset)
101 f7d0fe02 Kevin Wolf
        return 0;
102 29c1a730 Kevin Wolf
103 29c1a730 Kevin Wolf
    ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
104 29c1a730 Kevin Wolf
        (void**) &refcount_block);
105 29c1a730 Kevin Wolf
    if (ret < 0) {
106 29c1a730 Kevin Wolf
        return ret;
107 f7d0fe02 Kevin Wolf
    }
108 29c1a730 Kevin Wolf
109 f7d0fe02 Kevin Wolf
    block_index = cluster_index &
110 f7d0fe02 Kevin Wolf
        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
111 29c1a730 Kevin Wolf
    refcount = be16_to_cpu(refcount_block[block_index]);
112 29c1a730 Kevin Wolf
113 29c1a730 Kevin Wolf
    ret = qcow2_cache_put(bs, s->refcount_block_cache,
114 29c1a730 Kevin Wolf
        (void**) &refcount_block);
115 29c1a730 Kevin Wolf
    if (ret < 0) {
116 29c1a730 Kevin Wolf
        return ret;
117 29c1a730 Kevin Wolf
    }
118 29c1a730 Kevin Wolf
119 29c1a730 Kevin Wolf
    return refcount;
120 f7d0fe02 Kevin Wolf
}
121 f7d0fe02 Kevin Wolf
122 05121aed Kevin Wolf
/*
123 05121aed Kevin Wolf
 * Rounds the refcount table size up to avoid growing the table for each single
124 05121aed Kevin Wolf
 * refcount block that is allocated.
125 05121aed Kevin Wolf
 */
126 05121aed Kevin Wolf
static unsigned int next_refcount_table_size(BDRVQcowState *s,
127 05121aed Kevin Wolf
    unsigned int min_size)
128 05121aed Kevin Wolf
{
129 05121aed Kevin Wolf
    unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
130 05121aed Kevin Wolf
    unsigned int refcount_table_clusters =
131 05121aed Kevin Wolf
        MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
132 05121aed Kevin Wolf
133 05121aed Kevin Wolf
    while (min_clusters > refcount_table_clusters) {
134 05121aed Kevin Wolf
        refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
135 05121aed Kevin Wolf
    }
136 05121aed Kevin Wolf
137 05121aed Kevin Wolf
    return refcount_table_clusters << (s->cluster_bits - 3);
138 05121aed Kevin Wolf
}
139 05121aed Kevin Wolf
140 92dcb59f Kevin Wolf
141 92dcb59f Kevin Wolf
/* Checks if two offsets are described by the same refcount block */
142 92dcb59f Kevin Wolf
static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
143 92dcb59f Kevin Wolf
    uint64_t offset_b)
144 92dcb59f Kevin Wolf
{
145 92dcb59f Kevin Wolf
    uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
146 92dcb59f Kevin Wolf
    uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
147 92dcb59f Kevin Wolf
148 92dcb59f Kevin Wolf
    return (block_a == block_b);
149 92dcb59f Kevin Wolf
}
150 92dcb59f Kevin Wolf
151 92dcb59f Kevin Wolf
/*
152 92dcb59f Kevin Wolf
 * Loads a refcount block. If it doesn't exist yet, it is allocated first
153 92dcb59f Kevin Wolf
 * (including growing the refcount table if needed).
154 92dcb59f Kevin Wolf
 *
155 29c1a730 Kevin Wolf
 * Returns 0 on success or -errno in error case
156 92dcb59f Kevin Wolf
 */
157 29c1a730 Kevin Wolf
static int alloc_refcount_block(BlockDriverState *bs,
158 29c1a730 Kevin Wolf
    int64_t cluster_index, uint16_t **refcount_block)
159 f7d0fe02 Kevin Wolf
{
160 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
161 92dcb59f Kevin Wolf
    unsigned int refcount_table_index;
162 92dcb59f Kevin Wolf
    int ret;
163 92dcb59f Kevin Wolf
164 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
165 8252278a Kevin Wolf
166 92dcb59f Kevin Wolf
    /* Find the refcount block for the given cluster */
167 92dcb59f Kevin Wolf
    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
168 92dcb59f Kevin Wolf
169 92dcb59f Kevin Wolf
    if (refcount_table_index < s->refcount_table_size) {
170 92dcb59f Kevin Wolf
171 92dcb59f Kevin Wolf
        uint64_t refcount_block_offset =
172 76dc9e0c Kevin Wolf
            s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
173 92dcb59f Kevin Wolf
174 92dcb59f Kevin Wolf
        /* If it's already there, we're done */
175 92dcb59f Kevin Wolf
        if (refcount_block_offset) {
176 29c1a730 Kevin Wolf
             return load_refcount_block(bs, refcount_block_offset,
177 29c1a730 Kevin Wolf
                 (void**) refcount_block);
178 92dcb59f Kevin Wolf
        }
179 92dcb59f Kevin Wolf
    }
180 92dcb59f Kevin Wolf
181 92dcb59f Kevin Wolf
    /*
182 92dcb59f Kevin Wolf
     * If we came here, we need to allocate something. Something is at least
183 92dcb59f Kevin Wolf
     * a cluster for the new refcount block. It may also include a new refcount
184 92dcb59f Kevin Wolf
     * table if the old refcount table is too small.
185 92dcb59f Kevin Wolf
     *
186 92dcb59f Kevin Wolf
     * Note that allocating clusters here needs some special care:
187 92dcb59f Kevin Wolf
     *
188 92dcb59f Kevin Wolf
     * - We can't use the normal qcow2_alloc_clusters(), it would try to
189 92dcb59f Kevin Wolf
     *   increase the refcount and very likely we would end up with an endless
190 92dcb59f Kevin Wolf
     *   recursion. Instead we must place the refcount blocks in a way that
191 92dcb59f Kevin Wolf
     *   they can describe them themselves.
192 92dcb59f Kevin Wolf
     *
193 92dcb59f Kevin Wolf
     * - We need to consider that at this point we are inside update_refcounts
194 92dcb59f Kevin Wolf
     *   and doing the initial refcount increase. This means that some clusters
195 92dcb59f Kevin Wolf
     *   have already been allocated by the caller, but their refcount isn't
196 92dcb59f Kevin Wolf
     *   accurate yet. free_cluster_index tells us where this allocation ends
197 92dcb59f Kevin Wolf
     *   as long as we don't overwrite it by freeing clusters.
198 92dcb59f Kevin Wolf
     *
199 92dcb59f Kevin Wolf
     * - alloc_clusters_noref and qcow2_free_clusters may load a different
200 92dcb59f Kevin Wolf
     *   refcount block into the cache
201 92dcb59f Kevin Wolf
     */
202 92dcb59f Kevin Wolf
203 29c1a730 Kevin Wolf
    *refcount_block = NULL;
204 29c1a730 Kevin Wolf
205 29c1a730 Kevin Wolf
    /* We write to the refcount table, so we might depend on L2 tables */
206 9991923b Stefan Hajnoczi
    ret = qcow2_cache_flush(bs, s->l2_table_cache);
207 9991923b Stefan Hajnoczi
    if (ret < 0) {
208 9991923b Stefan Hajnoczi
        return ret;
209 9991923b Stefan Hajnoczi
    }
210 92dcb59f Kevin Wolf
211 92dcb59f Kevin Wolf
    /* Allocate the refcount block itself and mark it as used */
212 2eaa8f63 Kevin Wolf
    int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
213 2eaa8f63 Kevin Wolf
    if (new_block < 0) {
214 2eaa8f63 Kevin Wolf
        return new_block;
215 2eaa8f63 Kevin Wolf
    }
216 f7d0fe02 Kevin Wolf
217 f7d0fe02 Kevin Wolf
#ifdef DEBUG_ALLOC2
218 92dcb59f Kevin Wolf
    fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
219 92dcb59f Kevin Wolf
        " at %" PRIx64 "\n",
220 92dcb59f Kevin Wolf
        refcount_table_index, cluster_index << s->cluster_bits, new_block);
221 f7d0fe02 Kevin Wolf
#endif
222 92dcb59f Kevin Wolf
223 92dcb59f Kevin Wolf
    if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
224 25408c09 Kevin Wolf
        /* Zero the new refcount block before updating it */
225 29c1a730 Kevin Wolf
        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
226 29c1a730 Kevin Wolf
            (void**) refcount_block);
227 29c1a730 Kevin Wolf
        if (ret < 0) {
228 29c1a730 Kevin Wolf
            goto fail_block;
229 29c1a730 Kevin Wolf
        }
230 29c1a730 Kevin Wolf
231 29c1a730 Kevin Wolf
        memset(*refcount_block, 0, s->cluster_size);
232 25408c09 Kevin Wolf
233 92dcb59f Kevin Wolf
        /* The block describes itself, need to update the cache */
234 92dcb59f Kevin Wolf
        int block_index = (new_block >> s->cluster_bits) &
235 92dcb59f Kevin Wolf
            ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
236 29c1a730 Kevin Wolf
        (*refcount_block)[block_index] = cpu_to_be16(1);
237 92dcb59f Kevin Wolf
    } else {
238 92dcb59f Kevin Wolf
        /* Described somewhere else. This can recurse at most twice before we
239 92dcb59f Kevin Wolf
         * arrive at a block that describes itself. */
240 6cfcb9b8 Kevin Wolf
        ret = update_refcount(bs, new_block, s->cluster_size, 1,
241 6cfcb9b8 Kevin Wolf
                              QCOW2_DISCARD_NEVER);
242 92dcb59f Kevin Wolf
        if (ret < 0) {
243 92dcb59f Kevin Wolf
            goto fail_block;
244 92dcb59f Kevin Wolf
        }
245 25408c09 Kevin Wolf
246 9991923b Stefan Hajnoczi
        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
247 9991923b Stefan Hajnoczi
        if (ret < 0) {
248 9991923b Stefan Hajnoczi
            goto fail_block;
249 9991923b Stefan Hajnoczi
        }
250 1c4c2814 Kevin Wolf
251 25408c09 Kevin Wolf
        /* Initialize the new refcount block only after updating its refcount,
252 25408c09 Kevin Wolf
         * update_refcount uses the refcount cache itself */
253 29c1a730 Kevin Wolf
        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
254 29c1a730 Kevin Wolf
            (void**) refcount_block);
255 29c1a730 Kevin Wolf
        if (ret < 0) {
256 29c1a730 Kevin Wolf
            goto fail_block;
257 29c1a730 Kevin Wolf
        }
258 29c1a730 Kevin Wolf
259 29c1a730 Kevin Wolf
        memset(*refcount_block, 0, s->cluster_size);
260 92dcb59f Kevin Wolf
    }
261 92dcb59f Kevin Wolf
262 92dcb59f Kevin Wolf
    /* Now the new refcount block needs to be written to disk */
263 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
264 29c1a730 Kevin Wolf
    qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
265 29c1a730 Kevin Wolf
    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
266 92dcb59f Kevin Wolf
    if (ret < 0) {
267 92dcb59f Kevin Wolf
        goto fail_block;
268 92dcb59f Kevin Wolf
    }
269 92dcb59f Kevin Wolf
270 92dcb59f Kevin Wolf
    /* If the refcount table is big enough, just hook the block up there */
271 92dcb59f Kevin Wolf
    if (refcount_table_index < s->refcount_table_size) {
272 92dcb59f Kevin Wolf
        uint64_t data64 = cpu_to_be64(new_block);
273 66f82cee Kevin Wolf
        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
274 8b3b7206 Kevin Wolf
        ret = bdrv_pwrite_sync(bs->file,
275 92dcb59f Kevin Wolf
            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
276 92dcb59f Kevin Wolf
            &data64, sizeof(data64));
277 92dcb59f Kevin Wolf
        if (ret < 0) {
278 92dcb59f Kevin Wolf
            goto fail_block;
279 92dcb59f Kevin Wolf
        }
280 92dcb59f Kevin Wolf
281 92dcb59f Kevin Wolf
        s->refcount_table[refcount_table_index] = new_block;
282 29c1a730 Kevin Wolf
        return 0;
283 29c1a730 Kevin Wolf
    }
284 29c1a730 Kevin Wolf
285 29c1a730 Kevin Wolf
    ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
286 29c1a730 Kevin Wolf
    if (ret < 0) {
287 29c1a730 Kevin Wolf
        goto fail_block;
288 92dcb59f Kevin Wolf
    }
289 92dcb59f Kevin Wolf
290 92dcb59f Kevin Wolf
    /*
291 92dcb59f Kevin Wolf
     * If we come here, we need to grow the refcount table. Again, a new
292 92dcb59f Kevin Wolf
     * refcount table needs some space and we can't simply allocate to avoid
293 92dcb59f Kevin Wolf
     * endless recursion.
294 92dcb59f Kevin Wolf
     *
295 92dcb59f Kevin Wolf
     * Therefore let's grab new refcount blocks at the end of the image, which
296 92dcb59f Kevin Wolf
     * will describe themselves and the new refcount table. This way we can
297 92dcb59f Kevin Wolf
     * reference them only in the new table and do the switch to the new
298 92dcb59f Kevin Wolf
     * refcount table at once without producing an inconsistent state in
299 92dcb59f Kevin Wolf
     * between.
300 92dcb59f Kevin Wolf
     */
301 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
302 8252278a Kevin Wolf
303 92dcb59f Kevin Wolf
    /* Calculate the number of refcount blocks needed so far */
304 92dcb59f Kevin Wolf
    uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
305 92dcb59f Kevin Wolf
    uint64_t blocks_used = (s->free_cluster_index +
306 92dcb59f Kevin Wolf
        refcount_block_clusters - 1) / refcount_block_clusters;
307 92dcb59f Kevin Wolf
308 92dcb59f Kevin Wolf
    /* And now we need at least one block more for the new metadata */
309 92dcb59f Kevin Wolf
    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
310 92dcb59f Kevin Wolf
    uint64_t last_table_size;
311 92dcb59f Kevin Wolf
    uint64_t blocks_clusters;
312 92dcb59f Kevin Wolf
    do {
313 a3548077 Kevin Wolf
        uint64_t table_clusters =
314 a3548077 Kevin Wolf
            size_to_clusters(s, table_size * sizeof(uint64_t));
315 92dcb59f Kevin Wolf
        blocks_clusters = 1 +
316 92dcb59f Kevin Wolf
            ((table_clusters + refcount_block_clusters - 1)
317 92dcb59f Kevin Wolf
            / refcount_block_clusters);
318 92dcb59f Kevin Wolf
        uint64_t meta_clusters = table_clusters + blocks_clusters;
319 92dcb59f Kevin Wolf
320 92dcb59f Kevin Wolf
        last_table_size = table_size;
321 92dcb59f Kevin Wolf
        table_size = next_refcount_table_size(s, blocks_used +
322 92dcb59f Kevin Wolf
            ((meta_clusters + refcount_block_clusters - 1)
323 92dcb59f Kevin Wolf
            / refcount_block_clusters));
324 92dcb59f Kevin Wolf
325 92dcb59f Kevin Wolf
    } while (last_table_size != table_size);
326 92dcb59f Kevin Wolf
327 92dcb59f Kevin Wolf
#ifdef DEBUG_ALLOC2
328 92dcb59f Kevin Wolf
    fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
329 92dcb59f Kevin Wolf
        s->refcount_table_size, table_size);
330 92dcb59f Kevin Wolf
#endif
331 92dcb59f Kevin Wolf
332 92dcb59f Kevin Wolf
    /* Create the new refcount table and blocks */
333 92dcb59f Kevin Wolf
    uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
334 92dcb59f Kevin Wolf
        s->cluster_size;
335 92dcb59f Kevin Wolf
    uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
336 7267c094 Anthony Liguori
    uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
337 7267c094 Anthony Liguori
    uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
338 92dcb59f Kevin Wolf
339 92dcb59f Kevin Wolf
    assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
340 92dcb59f Kevin Wolf
341 92dcb59f Kevin Wolf
    /* Fill the new refcount table */
342 f7d0fe02 Kevin Wolf
    memcpy(new_table, s->refcount_table,
343 92dcb59f Kevin Wolf
        s->refcount_table_size * sizeof(uint64_t));
344 92dcb59f Kevin Wolf
    new_table[refcount_table_index] = new_block;
345 92dcb59f Kevin Wolf
346 92dcb59f Kevin Wolf
    int i;
347 92dcb59f Kevin Wolf
    for (i = 0; i < blocks_clusters; i++) {
348 92dcb59f Kevin Wolf
        new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
349 92dcb59f Kevin Wolf
    }
350 92dcb59f Kevin Wolf
351 92dcb59f Kevin Wolf
    /* Fill the refcount blocks */
352 92dcb59f Kevin Wolf
    uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
353 92dcb59f Kevin Wolf
    int block = 0;
354 92dcb59f Kevin Wolf
    for (i = 0; i < table_clusters + blocks_clusters; i++) {
355 92dcb59f Kevin Wolf
        new_blocks[block++] = cpu_to_be16(1);
356 92dcb59f Kevin Wolf
    }
357 92dcb59f Kevin Wolf
358 92dcb59f Kevin Wolf
    /* Write refcount blocks to disk */
359 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
360 8b3b7206 Kevin Wolf
    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
361 92dcb59f Kevin Wolf
        blocks_clusters * s->cluster_size);
362 7267c094 Anthony Liguori
    g_free(new_blocks);
363 92dcb59f Kevin Wolf
    if (ret < 0) {
364 92dcb59f Kevin Wolf
        goto fail_table;
365 92dcb59f Kevin Wolf
    }
366 92dcb59f Kevin Wolf
367 92dcb59f Kevin Wolf
    /* Write refcount table to disk */
368 92dcb59f Kevin Wolf
    for(i = 0; i < table_size; i++) {
369 92dcb59f Kevin Wolf
        cpu_to_be64s(&new_table[i]);
370 92dcb59f Kevin Wolf
    }
371 92dcb59f Kevin Wolf
372 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
373 8b3b7206 Kevin Wolf
    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
374 92dcb59f Kevin Wolf
        table_size * sizeof(uint64_t));
375 92dcb59f Kevin Wolf
    if (ret < 0) {
376 92dcb59f Kevin Wolf
        goto fail_table;
377 92dcb59f Kevin Wolf
    }
378 92dcb59f Kevin Wolf
379 92dcb59f Kevin Wolf
    for(i = 0; i < table_size; i++) {
380 87267753 Zhi Yong Wu
        be64_to_cpus(&new_table[i]);
381 92dcb59f Kevin Wolf
    }
382 f7d0fe02 Kevin Wolf
383 92dcb59f Kevin Wolf
    /* Hook up the new refcount table in the qcow2 header */
384 92dcb59f Kevin Wolf
    uint8_t data[12];
385 f7d0fe02 Kevin Wolf
    cpu_to_be64w((uint64_t*)data, table_offset);
386 92dcb59f Kevin Wolf
    cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
387 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
388 8b3b7206 Kevin Wolf
    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
389 92dcb59f Kevin Wolf
        data, sizeof(data));
390 92dcb59f Kevin Wolf
    if (ret < 0) {
391 92dcb59f Kevin Wolf
        goto fail_table;
392 f2b7c8b3 Kevin Wolf
    }
393 f2b7c8b3 Kevin Wolf
394 92dcb59f Kevin Wolf
    /* And switch it in memory */
395 92dcb59f Kevin Wolf
    uint64_t old_table_offset = s->refcount_table_offset;
396 92dcb59f Kevin Wolf
    uint64_t old_table_size = s->refcount_table_size;
397 92dcb59f Kevin Wolf
398 7267c094 Anthony Liguori
    g_free(s->refcount_table);
399 f7d0fe02 Kevin Wolf
    s->refcount_table = new_table;
400 92dcb59f Kevin Wolf
    s->refcount_table_size = table_size;
401 f7d0fe02 Kevin Wolf
    s->refcount_table_offset = table_offset;
402 f7d0fe02 Kevin Wolf
403 92dcb59f Kevin Wolf
    /* Free old table. Remember, we must not change free_cluster_index */
404 92dcb59f Kevin Wolf
    uint64_t old_free_cluster_index = s->free_cluster_index;
405 6cfcb9b8 Kevin Wolf
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
406 6cfcb9b8 Kevin Wolf
                        QCOW2_DISCARD_OTHER);
407 92dcb59f Kevin Wolf
    s->free_cluster_index = old_free_cluster_index;
408 f7d0fe02 Kevin Wolf
409 29c1a730 Kevin Wolf
    ret = load_refcount_block(bs, new_block, (void**) refcount_block);
410 92dcb59f Kevin Wolf
    if (ret < 0) {
411 29c1a730 Kevin Wolf
        return ret;
412 f7d0fe02 Kevin Wolf
    }
413 f7d0fe02 Kevin Wolf
414 2795ecf6 Kevin Wolf
    return 0;
415 f7d0fe02 Kevin Wolf
416 92dcb59f Kevin Wolf
fail_table:
417 7267c094 Anthony Liguori
    g_free(new_table);
418 92dcb59f Kevin Wolf
fail_block:
419 29c1a730 Kevin Wolf
    if (*refcount_block != NULL) {
420 29c1a730 Kevin Wolf
        qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
421 3b88e52b Kevin Wolf
    }
422 29c1a730 Kevin Wolf
    return ret;
423 9923e05e Kevin Wolf
}
424 9923e05e Kevin Wolf
425 0b919fae Kevin Wolf
void qcow2_process_discards(BlockDriverState *bs, int ret)
426 0b919fae Kevin Wolf
{
427 0b919fae Kevin Wolf
    BDRVQcowState *s = bs->opaque;
428 0b919fae Kevin Wolf
    Qcow2DiscardRegion *d, *next;
429 0b919fae Kevin Wolf
430 0b919fae Kevin Wolf
    QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
431 0b919fae Kevin Wolf
        QTAILQ_REMOVE(&s->discards, d, next);
432 0b919fae Kevin Wolf
433 0b919fae Kevin Wolf
        /* Discard is optional, ignore the return value */
434 0b919fae Kevin Wolf
        if (ret >= 0) {
435 0b919fae Kevin Wolf
            bdrv_discard(bs->file,
436 0b919fae Kevin Wolf
                         d->offset >> BDRV_SECTOR_BITS,
437 0b919fae Kevin Wolf
                         d->bytes >> BDRV_SECTOR_BITS);
438 0b919fae Kevin Wolf
        }
439 0b919fae Kevin Wolf
440 0b919fae Kevin Wolf
        g_free(d);
441 0b919fae Kevin Wolf
    }
442 0b919fae Kevin Wolf
}
443 0b919fae Kevin Wolf
444 0b919fae Kevin Wolf
static void update_refcount_discard(BlockDriverState *bs,
445 0b919fae Kevin Wolf
                                    uint64_t offset, uint64_t length)
446 0b919fae Kevin Wolf
{
447 0b919fae Kevin Wolf
    BDRVQcowState *s = bs->opaque;
448 0b919fae Kevin Wolf
    Qcow2DiscardRegion *d, *p, *next;
449 0b919fae Kevin Wolf
450 0b919fae Kevin Wolf
    QTAILQ_FOREACH(d, &s->discards, next) {
451 0b919fae Kevin Wolf
        uint64_t new_start = MIN(offset, d->offset);
452 0b919fae Kevin Wolf
        uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
453 0b919fae Kevin Wolf
454 0b919fae Kevin Wolf
        if (new_end - new_start <= length + d->bytes) {
455 0b919fae Kevin Wolf
            /* There can't be any overlap, areas ending up here have no
456 0b919fae Kevin Wolf
             * references any more and therefore shouldn't get freed another
457 0b919fae Kevin Wolf
             * time. */
458 0b919fae Kevin Wolf
            assert(d->bytes + length == new_end - new_start);
459 0b919fae Kevin Wolf
            d->offset = new_start;
460 0b919fae Kevin Wolf
            d->bytes = new_end - new_start;
461 0b919fae Kevin Wolf
            goto found;
462 0b919fae Kevin Wolf
        }
463 0b919fae Kevin Wolf
    }
464 0b919fae Kevin Wolf
465 0b919fae Kevin Wolf
    d = g_malloc(sizeof(*d));
466 0b919fae Kevin Wolf
    *d = (Qcow2DiscardRegion) {
467 0b919fae Kevin Wolf
        .bs     = bs,
468 0b919fae Kevin Wolf
        .offset = offset,
469 0b919fae Kevin Wolf
        .bytes  = length,
470 0b919fae Kevin Wolf
    };
471 0b919fae Kevin Wolf
    QTAILQ_INSERT_TAIL(&s->discards, d, next);
472 0b919fae Kevin Wolf
473 0b919fae Kevin Wolf
found:
474 0b919fae Kevin Wolf
    /* Merge discard requests if they are adjacent now */
475 0b919fae Kevin Wolf
    QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
476 0b919fae Kevin Wolf
        if (p == d
477 0b919fae Kevin Wolf
            || p->offset > d->offset + d->bytes
478 0b919fae Kevin Wolf
            || d->offset > p->offset + p->bytes)
479 0b919fae Kevin Wolf
        {
480 0b919fae Kevin Wolf
            continue;
481 0b919fae Kevin Wolf
        }
482 0b919fae Kevin Wolf
483 0b919fae Kevin Wolf
        /* Still no overlap possible */
484 0b919fae Kevin Wolf
        assert(p->offset == d->offset + d->bytes
485 0b919fae Kevin Wolf
            || d->offset == p->offset + p->bytes);
486 0b919fae Kevin Wolf
487 0b919fae Kevin Wolf
        QTAILQ_REMOVE(&s->discards, p, next);
488 0b919fae Kevin Wolf
        d->offset = MIN(d->offset, p->offset);
489 0b919fae Kevin Wolf
        d->bytes += p->bytes;
490 0b919fae Kevin Wolf
    }
491 0b919fae Kevin Wolf
}
492 0b919fae Kevin Wolf
493 f7d0fe02 Kevin Wolf
/* XXX: cache several refcount block clusters ? */
494 db3a964f Kevin Wolf
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
495 6cfcb9b8 Kevin Wolf
    int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
496 f7d0fe02 Kevin Wolf
{
497 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
498 f7d0fe02 Kevin Wolf
    int64_t start, last, cluster_offset;
499 29c1a730 Kevin Wolf
    uint16_t *refcount_block = NULL;
500 29c1a730 Kevin Wolf
    int64_t old_table_index = -1;
501 09508d13 Kevin Wolf
    int ret;
502 f7d0fe02 Kevin Wolf
503 f7d0fe02 Kevin Wolf
#ifdef DEBUG_ALLOC2
504 35ee5e39 Frediano Ziglio
    fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
505 f7d0fe02 Kevin Wolf
           offset, length, addend);
506 f7d0fe02 Kevin Wolf
#endif
507 7322afe7 Kevin Wolf
    if (length < 0) {
508 f7d0fe02 Kevin Wolf
        return -EINVAL;
509 7322afe7 Kevin Wolf
    } else if (length == 0) {
510 7322afe7 Kevin Wolf
        return 0;
511 7322afe7 Kevin Wolf
    }
512 7322afe7 Kevin Wolf
513 29c1a730 Kevin Wolf
    if (addend < 0) {
514 29c1a730 Kevin Wolf
        qcow2_cache_set_dependency(bs, s->refcount_block_cache,
515 29c1a730 Kevin Wolf
            s->l2_table_cache);
516 29c1a730 Kevin Wolf
    }
517 29c1a730 Kevin Wolf
518 f7d0fe02 Kevin Wolf
    start = offset & ~(s->cluster_size - 1);
519 f7d0fe02 Kevin Wolf
    last = (offset + length - 1) & ~(s->cluster_size - 1);
520 f7d0fe02 Kevin Wolf
    for(cluster_offset = start; cluster_offset <= last;
521 f7d0fe02 Kevin Wolf
        cluster_offset += s->cluster_size)
522 f7d0fe02 Kevin Wolf
    {
523 f7d0fe02 Kevin Wolf
        int block_index, refcount;
524 f7d0fe02 Kevin Wolf
        int64_t cluster_index = cluster_offset >> s->cluster_bits;
525 29c1a730 Kevin Wolf
        int64_t table_index =
526 29c1a730 Kevin Wolf
            cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
527 f7d0fe02 Kevin Wolf
528 29c1a730 Kevin Wolf
        /* Load the refcount block and allocate it if needed */
529 29c1a730 Kevin Wolf
        if (table_index != old_table_index) {
530 29c1a730 Kevin Wolf
            if (refcount_block) {
531 29c1a730 Kevin Wolf
                ret = qcow2_cache_put(bs, s->refcount_block_cache,
532 29c1a730 Kevin Wolf
                    (void**) &refcount_block);
533 29c1a730 Kevin Wolf
                if (ret < 0) {
534 29c1a730 Kevin Wolf
                    goto fail;
535 29c1a730 Kevin Wolf
                }
536 29c1a730 Kevin Wolf
            }
537 9923e05e Kevin Wolf
538 29c1a730 Kevin Wolf
            ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
539 ed0df867 Kevin Wolf
            if (ret < 0) {
540 29c1a730 Kevin Wolf
                goto fail;
541 f7d0fe02 Kevin Wolf
            }
542 f7d0fe02 Kevin Wolf
        }
543 29c1a730 Kevin Wolf
        old_table_index = table_index;
544 f7d0fe02 Kevin Wolf
545 29c1a730 Kevin Wolf
        qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
546 f7d0fe02 Kevin Wolf
547 f7d0fe02 Kevin Wolf
        /* we can update the count and save it */
548 f7d0fe02 Kevin Wolf
        block_index = cluster_index &
549 f7d0fe02 Kevin Wolf
            ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
550 f7d0fe02 Kevin Wolf
551 29c1a730 Kevin Wolf
        refcount = be16_to_cpu(refcount_block[block_index]);
552 f7d0fe02 Kevin Wolf
        refcount += addend;
553 09508d13 Kevin Wolf
        if (refcount < 0 || refcount > 0xffff) {
554 09508d13 Kevin Wolf
            ret = -EINVAL;
555 09508d13 Kevin Wolf
            goto fail;
556 09508d13 Kevin Wolf
        }
557 f7d0fe02 Kevin Wolf
        if (refcount == 0 && cluster_index < s->free_cluster_index) {
558 f7d0fe02 Kevin Wolf
            s->free_cluster_index = cluster_index;
559 f7d0fe02 Kevin Wolf
        }
560 29c1a730 Kevin Wolf
        refcount_block[block_index] = cpu_to_be16(refcount);
561 0b919fae Kevin Wolf
562 67af674e Kevin Wolf
        if (refcount == 0 && s->discard_passthrough[type]) {
563 0b919fae Kevin Wolf
            update_refcount_discard(bs, cluster_offset, s->cluster_size);
564 67af674e Kevin Wolf
        }
565 f7d0fe02 Kevin Wolf
    }
566 f7d0fe02 Kevin Wolf
567 09508d13 Kevin Wolf
    ret = 0;
568 09508d13 Kevin Wolf
fail:
569 0b919fae Kevin Wolf
    if (!s->cache_discards) {
570 0b919fae Kevin Wolf
        qcow2_process_discards(bs, ret);
571 0b919fae Kevin Wolf
    }
572 0b919fae Kevin Wolf
573 f7d0fe02 Kevin Wolf
    /* Write last changed block to disk */
574 29c1a730 Kevin Wolf
    if (refcount_block) {
575 ed0df867 Kevin Wolf
        int wret;
576 29c1a730 Kevin Wolf
        wret = qcow2_cache_put(bs, s->refcount_block_cache,
577 29c1a730 Kevin Wolf
            (void**) &refcount_block);
578 ed0df867 Kevin Wolf
        if (wret < 0) {
579 ed0df867 Kevin Wolf
            return ret < 0 ? ret : wret;
580 f7d0fe02 Kevin Wolf
        }
581 f7d0fe02 Kevin Wolf
    }
582 f7d0fe02 Kevin Wolf
583 09508d13 Kevin Wolf
    /*
584 09508d13 Kevin Wolf
     * Try do undo any updates if an error is returned (This may succeed in
585 09508d13 Kevin Wolf
     * some cases like ENOSPC for allocating a new refcount block)
586 09508d13 Kevin Wolf
     */
587 09508d13 Kevin Wolf
    if (ret < 0) {
588 09508d13 Kevin Wolf
        int dummy;
589 6cfcb9b8 Kevin Wolf
        dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
590 6cfcb9b8 Kevin Wolf
                                QCOW2_DISCARD_NEVER);
591 83e3f76c Blue Swirl
        (void)dummy;
592 09508d13 Kevin Wolf
    }
593 09508d13 Kevin Wolf
594 09508d13 Kevin Wolf
    return ret;
595 f7d0fe02 Kevin Wolf
}
596 f7d0fe02 Kevin Wolf
597 018faafd Kevin Wolf
/*
598 018faafd Kevin Wolf
 * Increases or decreases the refcount of a given cluster by one.
599 018faafd Kevin Wolf
 * addend must be 1 or -1.
600 018faafd Kevin Wolf
 *
601 018faafd Kevin Wolf
 * If the return value is non-negative, it is the new refcount of the cluster.
602 018faafd Kevin Wolf
 * If it is negative, it is -errno and indicates an error.
603 018faafd Kevin Wolf
 */
604 32b6444d Max Reitz
int qcow2_update_cluster_refcount(BlockDriverState *bs,
605 32b6444d Max Reitz
                                  int64_t cluster_index,
606 32b6444d Max Reitz
                                  int addend,
607 32b6444d Max Reitz
                                  enum qcow2_discard_type type)
608 f7d0fe02 Kevin Wolf
{
609 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
610 f7d0fe02 Kevin Wolf
    int ret;
611 f7d0fe02 Kevin Wolf
612 6cfcb9b8 Kevin Wolf
    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
613 6cfcb9b8 Kevin Wolf
                          type);
614 f7d0fe02 Kevin Wolf
    if (ret < 0) {
615 f7d0fe02 Kevin Wolf
        return ret;
616 f7d0fe02 Kevin Wolf
    }
617 f7d0fe02 Kevin Wolf
618 f7d0fe02 Kevin Wolf
    return get_refcount(bs, cluster_index);
619 f7d0fe02 Kevin Wolf
}
620 f7d0fe02 Kevin Wolf
621 f7d0fe02 Kevin Wolf
622 f7d0fe02 Kevin Wolf
623 f7d0fe02 Kevin Wolf
/*********************************************************/
624 f7d0fe02 Kevin Wolf
/* cluster allocation functions */
625 f7d0fe02 Kevin Wolf
626 f7d0fe02 Kevin Wolf
627 f7d0fe02 Kevin Wolf
628 f7d0fe02 Kevin Wolf
/* return < 0 if error */
629 f7d0fe02 Kevin Wolf
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
630 f7d0fe02 Kevin Wolf
{
631 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
632 2eaa8f63 Kevin Wolf
    int i, nb_clusters, refcount;
633 f7d0fe02 Kevin Wolf
634 f7d0fe02 Kevin Wolf
    nb_clusters = size_to_clusters(s, size);
635 f7d0fe02 Kevin Wolf
retry:
636 f7d0fe02 Kevin Wolf
    for(i = 0; i < nb_clusters; i++) {
637 508e0893 Stefan Hajnoczi
        int64_t next_cluster_index = s->free_cluster_index++;
638 2eaa8f63 Kevin Wolf
        refcount = get_refcount(bs, next_cluster_index);
639 2eaa8f63 Kevin Wolf
640 2eaa8f63 Kevin Wolf
        if (refcount < 0) {
641 2eaa8f63 Kevin Wolf
            return refcount;
642 2eaa8f63 Kevin Wolf
        } else if (refcount != 0) {
643 f7d0fe02 Kevin Wolf
            goto retry;
644 2eaa8f63 Kevin Wolf
        }
645 f7d0fe02 Kevin Wolf
    }
646 f7d0fe02 Kevin Wolf
#ifdef DEBUG_ALLOC2
647 35ee5e39 Frediano Ziglio
    fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
648 f7d0fe02 Kevin Wolf
            size,
649 f7d0fe02 Kevin Wolf
            (s->free_cluster_index - nb_clusters) << s->cluster_bits);
650 f7d0fe02 Kevin Wolf
#endif
651 f7d0fe02 Kevin Wolf
    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
652 f7d0fe02 Kevin Wolf
}
653 f7d0fe02 Kevin Wolf
654 ed6ccf0f Kevin Wolf
int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
655 f7d0fe02 Kevin Wolf
{
656 f7d0fe02 Kevin Wolf
    int64_t offset;
657 db3a964f Kevin Wolf
    int ret;
658 f7d0fe02 Kevin Wolf
659 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
660 f7d0fe02 Kevin Wolf
    offset = alloc_clusters_noref(bs, size);
661 2eaa8f63 Kevin Wolf
    if (offset < 0) {
662 2eaa8f63 Kevin Wolf
        return offset;
663 2eaa8f63 Kevin Wolf
    }
664 2eaa8f63 Kevin Wolf
665 6cfcb9b8 Kevin Wolf
    ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
666 db3a964f Kevin Wolf
    if (ret < 0) {
667 db3a964f Kevin Wolf
        return ret;
668 db3a964f Kevin Wolf
    }
669 1c4c2814 Kevin Wolf
670 f7d0fe02 Kevin Wolf
    return offset;
671 f7d0fe02 Kevin Wolf
}
672 f7d0fe02 Kevin Wolf
673 256900b1 Kevin Wolf
int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
674 256900b1 Kevin Wolf
    int nb_clusters)
675 256900b1 Kevin Wolf
{
676 256900b1 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
677 256900b1 Kevin Wolf
    uint64_t cluster_index;
678 f24423bd Kevin Wolf
    uint64_t old_free_cluster_index;
679 256900b1 Kevin Wolf
    int i, refcount, ret;
680 256900b1 Kevin Wolf
681 256900b1 Kevin Wolf
    /* Check how many clusters there are free */
682 256900b1 Kevin Wolf
    cluster_index = offset >> s->cluster_bits;
683 256900b1 Kevin Wolf
    for(i = 0; i < nb_clusters; i++) {
684 256900b1 Kevin Wolf
        refcount = get_refcount(bs, cluster_index++);
685 256900b1 Kevin Wolf
686 256900b1 Kevin Wolf
        if (refcount < 0) {
687 256900b1 Kevin Wolf
            return refcount;
688 256900b1 Kevin Wolf
        } else if (refcount != 0) {
689 256900b1 Kevin Wolf
            break;
690 256900b1 Kevin Wolf
        }
691 256900b1 Kevin Wolf
    }
692 256900b1 Kevin Wolf
693 256900b1 Kevin Wolf
    /* And then allocate them */
694 f24423bd Kevin Wolf
    old_free_cluster_index = s->free_cluster_index;
695 f24423bd Kevin Wolf
    s->free_cluster_index = cluster_index + i;
696 f24423bd Kevin Wolf
697 6cfcb9b8 Kevin Wolf
    ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
698 6cfcb9b8 Kevin Wolf
                          QCOW2_DISCARD_NEVER);
699 256900b1 Kevin Wolf
    if (ret < 0) {
700 256900b1 Kevin Wolf
        return ret;
701 256900b1 Kevin Wolf
    }
702 256900b1 Kevin Wolf
703 f24423bd Kevin Wolf
    s->free_cluster_index = old_free_cluster_index;
704 f24423bd Kevin Wolf
705 256900b1 Kevin Wolf
    return i;
706 256900b1 Kevin Wolf
}
707 256900b1 Kevin Wolf
708 f7d0fe02 Kevin Wolf
/* only used to allocate compressed sectors. We try to allocate
709 f7d0fe02 Kevin Wolf
   contiguous sectors. size must be <= cluster_size */
710 ed6ccf0f Kevin Wolf
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
711 f7d0fe02 Kevin Wolf
{
712 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
713 f7d0fe02 Kevin Wolf
    int64_t offset, cluster_offset;
714 f7d0fe02 Kevin Wolf
    int free_in_cluster;
715 f7d0fe02 Kevin Wolf
716 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
717 f7d0fe02 Kevin Wolf
    assert(size > 0 && size <= s->cluster_size);
718 f7d0fe02 Kevin Wolf
    if (s->free_byte_offset == 0) {
719 206e6d85 Stefan Hajnoczi
        offset = qcow2_alloc_clusters(bs, s->cluster_size);
720 206e6d85 Stefan Hajnoczi
        if (offset < 0) {
721 206e6d85 Stefan Hajnoczi
            return offset;
722 5d757b56 Kevin Wolf
        }
723 206e6d85 Stefan Hajnoczi
        s->free_byte_offset = offset;
724 f7d0fe02 Kevin Wolf
    }
725 f7d0fe02 Kevin Wolf
 redo:
726 f7d0fe02 Kevin Wolf
    free_in_cluster = s->cluster_size -
727 f7d0fe02 Kevin Wolf
        (s->free_byte_offset & (s->cluster_size - 1));
728 f7d0fe02 Kevin Wolf
    if (size <= free_in_cluster) {
729 f7d0fe02 Kevin Wolf
        /* enough space in current cluster */
730 f7d0fe02 Kevin Wolf
        offset = s->free_byte_offset;
731 f7d0fe02 Kevin Wolf
        s->free_byte_offset += size;
732 f7d0fe02 Kevin Wolf
        free_in_cluster -= size;
733 f7d0fe02 Kevin Wolf
        if (free_in_cluster == 0)
734 f7d0fe02 Kevin Wolf
            s->free_byte_offset = 0;
735 f7d0fe02 Kevin Wolf
        if ((offset & (s->cluster_size - 1)) != 0)
736 32b6444d Max Reitz
            qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
737 32b6444d Max Reitz
                                          QCOW2_DISCARD_NEVER);
738 f7d0fe02 Kevin Wolf
    } else {
739 ed6ccf0f Kevin Wolf
        offset = qcow2_alloc_clusters(bs, s->cluster_size);
740 5d757b56 Kevin Wolf
        if (offset < 0) {
741 5d757b56 Kevin Wolf
            return offset;
742 5d757b56 Kevin Wolf
        }
743 f7d0fe02 Kevin Wolf
        cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
744 f7d0fe02 Kevin Wolf
        if ((cluster_offset + s->cluster_size) == offset) {
745 f7d0fe02 Kevin Wolf
            /* we are lucky: contiguous data */
746 f7d0fe02 Kevin Wolf
            offset = s->free_byte_offset;
747 32b6444d Max Reitz
            qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
748 32b6444d Max Reitz
                                          QCOW2_DISCARD_NEVER);
749 f7d0fe02 Kevin Wolf
            s->free_byte_offset += size;
750 f7d0fe02 Kevin Wolf
        } else {
751 f7d0fe02 Kevin Wolf
            s->free_byte_offset = offset;
752 f7d0fe02 Kevin Wolf
            goto redo;
753 f7d0fe02 Kevin Wolf
        }
754 f7d0fe02 Kevin Wolf
    }
755 29216ed1 Kevin Wolf
756 c1f5bafd Stefan Hajnoczi
    /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
757 32b6444d Max Reitz
     * or explicitly by qcow2_update_cluster_refcount().  Refcount blocks must
758 32b6444d Max Reitz
     * be flushed before the caller's L2 table updates.
759 c1f5bafd Stefan Hajnoczi
     */
760 c1f5bafd Stefan Hajnoczi
    qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
761 f7d0fe02 Kevin Wolf
    return offset;
762 f7d0fe02 Kevin Wolf
}
763 f7d0fe02 Kevin Wolf
764 ed6ccf0f Kevin Wolf
void qcow2_free_clusters(BlockDriverState *bs,
765 6cfcb9b8 Kevin Wolf
                          int64_t offset, int64_t size,
766 6cfcb9b8 Kevin Wolf
                          enum qcow2_discard_type type)
767 f7d0fe02 Kevin Wolf
{
768 db3a964f Kevin Wolf
    int ret;
769 db3a964f Kevin Wolf
770 66f82cee Kevin Wolf
    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
771 6cfcb9b8 Kevin Wolf
    ret = update_refcount(bs, offset, size, -1, type);
772 db3a964f Kevin Wolf
    if (ret < 0) {
773 db3a964f Kevin Wolf
        fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
774 003fad6e Kevin Wolf
        /* TODO Remember the clusters to free them later and avoid leaking */
775 db3a964f Kevin Wolf
    }
776 f7d0fe02 Kevin Wolf
}
777 f7d0fe02 Kevin Wolf
778 45aba42f Kevin Wolf
/*
779 c7a4c37a Kevin Wolf
 * Free a cluster using its L2 entry (handles clusters of all types, e.g.
780 c7a4c37a Kevin Wolf
 * normal cluster, compressed cluster, etc.)
781 45aba42f Kevin Wolf
 */
782 6cfcb9b8 Kevin Wolf
void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
783 6cfcb9b8 Kevin Wolf
                             int nb_clusters, enum qcow2_discard_type type)
784 45aba42f Kevin Wolf
{
785 45aba42f Kevin Wolf
    BDRVQcowState *s = bs->opaque;
786 45aba42f Kevin Wolf
787 c7a4c37a Kevin Wolf
    switch (qcow2_get_cluster_type(l2_entry)) {
788 c7a4c37a Kevin Wolf
    case QCOW2_CLUSTER_COMPRESSED:
789 c7a4c37a Kevin Wolf
        {
790 c7a4c37a Kevin Wolf
            int nb_csectors;
791 c7a4c37a Kevin Wolf
            nb_csectors = ((l2_entry >> s->csize_shift) &
792 c7a4c37a Kevin Wolf
                           s->csize_mask) + 1;
793 c7a4c37a Kevin Wolf
            qcow2_free_clusters(bs,
794 c7a4c37a Kevin Wolf
                (l2_entry & s->cluster_offset_mask) & ~511,
795 6cfcb9b8 Kevin Wolf
                nb_csectors * 512, type);
796 c7a4c37a Kevin Wolf
        }
797 c7a4c37a Kevin Wolf
        break;
798 c7a4c37a Kevin Wolf
    case QCOW2_CLUSTER_NORMAL:
799 8f730dd2 Max Reitz
    case QCOW2_CLUSTER_ZERO:
800 8f730dd2 Max Reitz
        if (l2_entry & L2E_OFFSET_MASK) {
801 8f730dd2 Max Reitz
            qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
802 8f730dd2 Max Reitz
                                nb_clusters << s->cluster_bits, type);
803 8f730dd2 Max Reitz
        }
804 c7a4c37a Kevin Wolf
        break;
805 c7a4c37a Kevin Wolf
    case QCOW2_CLUSTER_UNALLOCATED:
806 c7a4c37a Kevin Wolf
        break;
807 c7a4c37a Kevin Wolf
    default:
808 c7a4c37a Kevin Wolf
        abort();
809 45aba42f Kevin Wolf
    }
810 45aba42f Kevin Wolf
}
811 45aba42f Kevin Wolf
812 f7d0fe02 Kevin Wolf
813 f7d0fe02 Kevin Wolf
814 f7d0fe02 Kevin Wolf
/*********************************************************/
815 f7d0fe02 Kevin Wolf
/* snapshots and image creation */
816 f7d0fe02 Kevin Wolf
817 f7d0fe02 Kevin Wolf
818 f7d0fe02 Kevin Wolf
819 f7d0fe02 Kevin Wolf
/* update the refcounts of snapshots and the copied flag */
820 ed6ccf0f Kevin Wolf
int qcow2_update_snapshot_refcount(BlockDriverState *bs,
821 ed6ccf0f Kevin Wolf
    int64_t l1_table_offset, int l1_size, int addend)
822 f7d0fe02 Kevin Wolf
{
823 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
824 f7d0fe02 Kevin Wolf
    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
825 f7d0fe02 Kevin Wolf
    int64_t old_offset, old_l2_offset;
826 93913dfd Kevin Wolf
    int i, j, l1_modified = 0, nb_csectors, refcount;
827 29c1a730 Kevin Wolf
    int ret;
828 f7d0fe02 Kevin Wolf
829 f7d0fe02 Kevin Wolf
    l2_table = NULL;
830 f7d0fe02 Kevin Wolf
    l1_table = NULL;
831 f7d0fe02 Kevin Wolf
    l1_size2 = l1_size * sizeof(uint64_t);
832 43a0cac4 Kevin Wolf
833 0b919fae Kevin Wolf
    s->cache_discards = true;
834 0b919fae Kevin Wolf
835 43a0cac4 Kevin Wolf
    /* WARNING: qcow2_snapshot_goto relies on this function not using the
836 43a0cac4 Kevin Wolf
     * l1_table_offset when it is the current s->l1_table_offset! Be careful
837 43a0cac4 Kevin Wolf
     * when changing this! */
838 f7d0fe02 Kevin Wolf
    if (l1_table_offset != s->l1_table_offset) {
839 6528499f Markus Armbruster
        l1_table = g_malloc0(align_offset(l1_size2, 512));
840 f7d0fe02 Kevin Wolf
        l1_allocated = 1;
841 c2bc78b6 Kevin Wolf
842 c2bc78b6 Kevin Wolf
        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
843 c2bc78b6 Kevin Wolf
        if (ret < 0) {
844 f7d0fe02 Kevin Wolf
            goto fail;
845 93913dfd Kevin Wolf
        }
846 93913dfd Kevin Wolf
847 f7d0fe02 Kevin Wolf
        for(i = 0;i < l1_size; i++)
848 f7d0fe02 Kevin Wolf
            be64_to_cpus(&l1_table[i]);
849 f7d0fe02 Kevin Wolf
    } else {
850 f7d0fe02 Kevin Wolf
        assert(l1_size == s->l1_size);
851 f7d0fe02 Kevin Wolf
        l1_table = s->l1_table;
852 f7d0fe02 Kevin Wolf
        l1_allocated = 0;
853 f7d0fe02 Kevin Wolf
    }
854 f7d0fe02 Kevin Wolf
855 f7d0fe02 Kevin Wolf
    for(i = 0; i < l1_size; i++) {
856 f7d0fe02 Kevin Wolf
        l2_offset = l1_table[i];
857 f7d0fe02 Kevin Wolf
        if (l2_offset) {
858 f7d0fe02 Kevin Wolf
            old_l2_offset = l2_offset;
859 8e37f681 Kevin Wolf
            l2_offset &= L1E_OFFSET_MASK;
860 29c1a730 Kevin Wolf
861 29c1a730 Kevin Wolf
            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
862 29c1a730 Kevin Wolf
                (void**) &l2_table);
863 29c1a730 Kevin Wolf
            if (ret < 0) {
864 f7d0fe02 Kevin Wolf
                goto fail;
865 29c1a730 Kevin Wolf
            }
866 29c1a730 Kevin Wolf
867 f7d0fe02 Kevin Wolf
            for(j = 0; j < s->l2_size; j++) {
868 8b81a7b6 Max Reitz
                uint64_t cluster_index;
869 8b81a7b6 Max Reitz
870 f7d0fe02 Kevin Wolf
                offset = be64_to_cpu(l2_table[j]);
871 8b81a7b6 Max Reitz
                old_offset = offset;
872 8b81a7b6 Max Reitz
                offset &= ~QCOW_OFLAG_COPIED;
873 8b81a7b6 Max Reitz
874 8b81a7b6 Max Reitz
                switch (qcow2_get_cluster_type(offset)) {
875 8b81a7b6 Max Reitz
                    case QCOW2_CLUSTER_COMPRESSED:
876 f7d0fe02 Kevin Wolf
                        nb_csectors = ((offset >> s->csize_shift) &
877 f7d0fe02 Kevin Wolf
                                       s->csize_mask) + 1;
878 db3a964f Kevin Wolf
                        if (addend != 0) {
879 db3a964f Kevin Wolf
                            ret = update_refcount(bs,
880 db3a964f Kevin Wolf
                                (offset & s->cluster_offset_mask) & ~511,
881 6cfcb9b8 Kevin Wolf
                                nb_csectors * 512, addend,
882 6cfcb9b8 Kevin Wolf
                                QCOW2_DISCARD_SNAPSHOT);
883 db3a964f Kevin Wolf
                            if (ret < 0) {
884 db3a964f Kevin Wolf
                                goto fail;
885 db3a964f Kevin Wolf
                            }
886 db3a964f Kevin Wolf
                        }
887 f7d0fe02 Kevin Wolf
                        /* compressed clusters are never modified */
888 f7d0fe02 Kevin Wolf
                        refcount = 2;
889 8b81a7b6 Max Reitz
                        break;
890 8b81a7b6 Max Reitz
891 8b81a7b6 Max Reitz
                    case QCOW2_CLUSTER_NORMAL:
892 8b81a7b6 Max Reitz
                    case QCOW2_CLUSTER_ZERO:
893 8b81a7b6 Max Reitz
                        cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
894 8b81a7b6 Max Reitz
                        if (!cluster_index) {
895 8b81a7b6 Max Reitz
                            /* unallocated */
896 8b81a7b6 Max Reitz
                            refcount = 0;
897 8b81a7b6 Max Reitz
                            break;
898 8b81a7b6 Max Reitz
                        }
899 f7d0fe02 Kevin Wolf
                        if (addend != 0) {
900 32b6444d Max Reitz
                            refcount = qcow2_update_cluster_refcount(bs,
901 32b6444d Max Reitz
                                    cluster_index, addend,
902 32b6444d Max Reitz
                                    QCOW2_DISCARD_SNAPSHOT);
903 f7d0fe02 Kevin Wolf
                        } else {
904 8e37f681 Kevin Wolf
                            refcount = get_refcount(bs, cluster_index);
905 f7d0fe02 Kevin Wolf
                        }
906 018faafd Kevin Wolf
907 018faafd Kevin Wolf
                        if (refcount < 0) {
908 c2bc78b6 Kevin Wolf
                            ret = refcount;
909 018faafd Kevin Wolf
                            goto fail;
910 018faafd Kevin Wolf
                        }
911 8b81a7b6 Max Reitz
                        break;
912 f7d0fe02 Kevin Wolf
913 8b81a7b6 Max Reitz
                    case QCOW2_CLUSTER_UNALLOCATED:
914 8b81a7b6 Max Reitz
                        refcount = 0;
915 8b81a7b6 Max Reitz
                        break;
916 8b81a7b6 Max Reitz
917 8b81a7b6 Max Reitz
                    default:
918 8b81a7b6 Max Reitz
                        abort();
919 8b81a7b6 Max Reitz
                }
920 8b81a7b6 Max Reitz
921 8b81a7b6 Max Reitz
                if (refcount == 1) {
922 8b81a7b6 Max Reitz
                    offset |= QCOW_OFLAG_COPIED;
923 8b81a7b6 Max Reitz
                }
924 8b81a7b6 Max Reitz
                if (offset != old_offset) {
925 8b81a7b6 Max Reitz
                    if (addend > 0) {
926 8b81a7b6 Max Reitz
                        qcow2_cache_set_dependency(bs, s->l2_table_cache,
927 8b81a7b6 Max Reitz
                            s->refcount_block_cache);
928 f7d0fe02 Kevin Wolf
                    }
929 8b81a7b6 Max Reitz
                    l2_table[j] = cpu_to_be64(offset);
930 8b81a7b6 Max Reitz
                    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
931 f7d0fe02 Kevin Wolf
                }
932 f7d0fe02 Kevin Wolf
            }
933 29c1a730 Kevin Wolf
934 29c1a730 Kevin Wolf
            ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
935 29c1a730 Kevin Wolf
            if (ret < 0) {
936 29c1a730 Kevin Wolf
                goto fail;
937 f7d0fe02 Kevin Wolf
            }
938 f7d0fe02 Kevin Wolf
939 29c1a730 Kevin Wolf
940 f7d0fe02 Kevin Wolf
            if (addend != 0) {
941 32b6444d Max Reitz
                refcount = qcow2_update_cluster_refcount(bs, l2_offset >>
942 32b6444d Max Reitz
                        s->cluster_bits, addend, QCOW2_DISCARD_SNAPSHOT);
943 f7d0fe02 Kevin Wolf
            } else {
944 f7d0fe02 Kevin Wolf
                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
945 f7d0fe02 Kevin Wolf
            }
946 018faafd Kevin Wolf
            if (refcount < 0) {
947 c2bc78b6 Kevin Wolf
                ret = refcount;
948 018faafd Kevin Wolf
                goto fail;
949 018faafd Kevin Wolf
            } else if (refcount == 1) {
950 f7d0fe02 Kevin Wolf
                l2_offset |= QCOW_OFLAG_COPIED;
951 f7d0fe02 Kevin Wolf
            }
952 f7d0fe02 Kevin Wolf
            if (l2_offset != old_l2_offset) {
953 f7d0fe02 Kevin Wolf
                l1_table[i] = l2_offset;
954 f7d0fe02 Kevin Wolf
                l1_modified = 1;
955 f7d0fe02 Kevin Wolf
            }
956 f7d0fe02 Kevin Wolf
        }
957 f7d0fe02 Kevin Wolf
    }
958 93913dfd Kevin Wolf
959 2154f24e Stefan Hajnoczi
    ret = bdrv_flush(bs);
960 93913dfd Kevin Wolf
fail:
961 93913dfd Kevin Wolf
    if (l2_table) {
962 93913dfd Kevin Wolf
        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
963 93913dfd Kevin Wolf
    }
964 93913dfd Kevin Wolf
965 0b919fae Kevin Wolf
    s->cache_discards = false;
966 0b919fae Kevin Wolf
    qcow2_process_discards(bs, ret);
967 0b919fae Kevin Wolf
968 43a0cac4 Kevin Wolf
    /* Update L1 only if it isn't deleted anyway (addend = -1) */
969 c2b6ff51 Kevin Wolf
    if (ret == 0 && addend >= 0 && l1_modified) {
970 c2b6ff51 Kevin Wolf
        for (i = 0; i < l1_size; i++) {
971 f7d0fe02 Kevin Wolf
            cpu_to_be64s(&l1_table[i]);
972 c2b6ff51 Kevin Wolf
        }
973 c2b6ff51 Kevin Wolf
974 c2b6ff51 Kevin Wolf
        ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
975 c2b6ff51 Kevin Wolf
976 c2b6ff51 Kevin Wolf
        for (i = 0; i < l1_size; i++) {
977 f7d0fe02 Kevin Wolf
            be64_to_cpus(&l1_table[i]);
978 c2b6ff51 Kevin Wolf
        }
979 f7d0fe02 Kevin Wolf
    }
980 f7d0fe02 Kevin Wolf
    if (l1_allocated)
981 7267c094 Anthony Liguori
        g_free(l1_table);
982 93913dfd Kevin Wolf
    return ret;
983 f7d0fe02 Kevin Wolf
}
984 f7d0fe02 Kevin Wolf
985 f7d0fe02 Kevin Wolf
986 f7d0fe02 Kevin Wolf
987 f7d0fe02 Kevin Wolf
988 f7d0fe02 Kevin Wolf
/*********************************************************/
989 f7d0fe02 Kevin Wolf
/* refcount checking functions */
990 f7d0fe02 Kevin Wolf
991 f7d0fe02 Kevin Wolf
992 f7d0fe02 Kevin Wolf
993 f7d0fe02 Kevin Wolf
/*
994 f7d0fe02 Kevin Wolf
 * Increases the refcount for a range of clusters in a given refcount table.
995 f7d0fe02 Kevin Wolf
 * This is used to construct a temporary refcount table out of L1 and L2 tables
996 f7d0fe02 Kevin Wolf
 * which can be compared the the refcount table saved in the image.
997 f7d0fe02 Kevin Wolf
 *
998 9ac228e0 Kevin Wolf
 * Modifies the number of errors in res.
999 f7d0fe02 Kevin Wolf
 */
1000 9ac228e0 Kevin Wolf
static void inc_refcounts(BlockDriverState *bs,
1001 9ac228e0 Kevin Wolf
                          BdrvCheckResult *res,
1002 f7d0fe02 Kevin Wolf
                          uint16_t *refcount_table,
1003 f7d0fe02 Kevin Wolf
                          int refcount_table_size,
1004 f7d0fe02 Kevin Wolf
                          int64_t offset, int64_t size)
1005 f7d0fe02 Kevin Wolf
{
1006 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
1007 f7d0fe02 Kevin Wolf
    int64_t start, last, cluster_offset;
1008 f7d0fe02 Kevin Wolf
    int k;
1009 f7d0fe02 Kevin Wolf
1010 f7d0fe02 Kevin Wolf
    if (size <= 0)
1011 9ac228e0 Kevin Wolf
        return;
1012 f7d0fe02 Kevin Wolf
1013 f7d0fe02 Kevin Wolf
    start = offset & ~(s->cluster_size - 1);
1014 f7d0fe02 Kevin Wolf
    last = (offset + size - 1) & ~(s->cluster_size - 1);
1015 f7d0fe02 Kevin Wolf
    for(cluster_offset = start; cluster_offset <= last;
1016 f7d0fe02 Kevin Wolf
        cluster_offset += s->cluster_size) {
1017 f7d0fe02 Kevin Wolf
        k = cluster_offset >> s->cluster_bits;
1018 9ac228e0 Kevin Wolf
        if (k < 0) {
1019 f7d0fe02 Kevin Wolf
            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
1020 f7d0fe02 Kevin Wolf
                cluster_offset);
1021 9ac228e0 Kevin Wolf
            res->corruptions++;
1022 9ac228e0 Kevin Wolf
        } else if (k >= refcount_table_size) {
1023 9ac228e0 Kevin Wolf
            fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
1024 9ac228e0 Kevin Wolf
                "the end of the image file, can't properly check refcounts.\n",
1025 9ac228e0 Kevin Wolf
                cluster_offset);
1026 9ac228e0 Kevin Wolf
            res->check_errors++;
1027 f7d0fe02 Kevin Wolf
        } else {
1028 f7d0fe02 Kevin Wolf
            if (++refcount_table[k] == 0) {
1029 f7d0fe02 Kevin Wolf
                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
1030 f7d0fe02 Kevin Wolf
                    "\n", cluster_offset);
1031 9ac228e0 Kevin Wolf
                res->corruptions++;
1032 f7d0fe02 Kevin Wolf
            }
1033 f7d0fe02 Kevin Wolf
        }
1034 f7d0fe02 Kevin Wolf
    }
1035 f7d0fe02 Kevin Wolf
}
1036 f7d0fe02 Kevin Wolf
1037 801f7044 Stefan Hajnoczi
/* Flags for check_refcounts_l1() and check_refcounts_l2() */
1038 801f7044 Stefan Hajnoczi
enum {
1039 fba31bae Stefan Hajnoczi
    CHECK_FRAG_INFO = 0x2,      /* update BlockFragInfo counters */
1040 801f7044 Stefan Hajnoczi
};
1041 801f7044 Stefan Hajnoczi
1042 f7d0fe02 Kevin Wolf
/*
1043 f7d0fe02 Kevin Wolf
 * Increases the refcount in the given refcount table for the all clusters
1044 f7d0fe02 Kevin Wolf
 * referenced in the L2 table. While doing so, performs some checks on L2
1045 f7d0fe02 Kevin Wolf
 * entries.
1046 f7d0fe02 Kevin Wolf
 *
1047 f7d0fe02 Kevin Wolf
 * Returns the number of errors found by the checks or -errno if an internal
1048 f7d0fe02 Kevin Wolf
 * error occurred.
1049 f7d0fe02 Kevin Wolf
 */
1050 9ac228e0 Kevin Wolf
static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
1051 f7d0fe02 Kevin Wolf
    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
1052 801f7044 Stefan Hajnoczi
    int flags)
1053 f7d0fe02 Kevin Wolf
{
1054 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
1055 afdf0abe Kevin Wolf
    uint64_t *l2_table, l2_entry;
1056 fba31bae Stefan Hajnoczi
    uint64_t next_contiguous_offset = 0;
1057 4f6ed88c Max Reitz
    int i, l2_size, nb_csectors;
1058 f7d0fe02 Kevin Wolf
1059 f7d0fe02 Kevin Wolf
    /* Read L2 table from disk */
1060 f7d0fe02 Kevin Wolf
    l2_size = s->l2_size * sizeof(uint64_t);
1061 7267c094 Anthony Liguori
    l2_table = g_malloc(l2_size);
1062 f7d0fe02 Kevin Wolf
1063 66f82cee Kevin Wolf
    if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
1064 f7d0fe02 Kevin Wolf
        goto fail;
1065 f7d0fe02 Kevin Wolf
1066 f7d0fe02 Kevin Wolf
    /* Do the actual checks */
1067 f7d0fe02 Kevin Wolf
    for(i = 0; i < s->l2_size; i++) {
1068 afdf0abe Kevin Wolf
        l2_entry = be64_to_cpu(l2_table[i]);
1069 afdf0abe Kevin Wolf
1070 afdf0abe Kevin Wolf
        switch (qcow2_get_cluster_type(l2_entry)) {
1071 afdf0abe Kevin Wolf
        case QCOW2_CLUSTER_COMPRESSED:
1072 afdf0abe Kevin Wolf
            /* Compressed clusters don't have QCOW_OFLAG_COPIED */
1073 afdf0abe Kevin Wolf
            if (l2_entry & QCOW_OFLAG_COPIED) {
1074 afdf0abe Kevin Wolf
                fprintf(stderr, "ERROR: cluster %" PRId64 ": "
1075 afdf0abe Kevin Wolf
                    "copied flag must never be set for compressed "
1076 afdf0abe Kevin Wolf
                    "clusters\n", l2_entry >> s->cluster_bits);
1077 afdf0abe Kevin Wolf
                l2_entry &= ~QCOW_OFLAG_COPIED;
1078 afdf0abe Kevin Wolf
                res->corruptions++;
1079 afdf0abe Kevin Wolf
            }
1080 f7d0fe02 Kevin Wolf
1081 afdf0abe Kevin Wolf
            /* Mark cluster as used */
1082 afdf0abe Kevin Wolf
            nb_csectors = ((l2_entry >> s->csize_shift) &
1083 afdf0abe Kevin Wolf
                           s->csize_mask) + 1;
1084 afdf0abe Kevin Wolf
            l2_entry &= s->cluster_offset_mask;
1085 afdf0abe Kevin Wolf
            inc_refcounts(bs, res, refcount_table, refcount_table_size,
1086 afdf0abe Kevin Wolf
                l2_entry & ~511, nb_csectors * 512);
1087 fba31bae Stefan Hajnoczi
1088 fba31bae Stefan Hajnoczi
            if (flags & CHECK_FRAG_INFO) {
1089 fba31bae Stefan Hajnoczi
                res->bfi.allocated_clusters++;
1090 4db35162 Stefan Hajnoczi
                res->bfi.compressed_clusters++;
1091 fba31bae Stefan Hajnoczi
1092 fba31bae Stefan Hajnoczi
                /* Compressed clusters are fragmented by nature.  Since they
1093 fba31bae Stefan Hajnoczi
                 * take up sub-sector space but we only have sector granularity
1094 fba31bae Stefan Hajnoczi
                 * I/O we need to re-read the same sectors even for adjacent
1095 fba31bae Stefan Hajnoczi
                 * compressed clusters.
1096 fba31bae Stefan Hajnoczi
                 */
1097 fba31bae Stefan Hajnoczi
                res->bfi.fragmented_clusters++;
1098 fba31bae Stefan Hajnoczi
            }
1099 afdf0abe Kevin Wolf
            break;
1100 f7d0fe02 Kevin Wolf
1101 6377af48 Kevin Wolf
        case QCOW2_CLUSTER_ZERO:
1102 6377af48 Kevin Wolf
            if ((l2_entry & L2E_OFFSET_MASK) == 0) {
1103 6377af48 Kevin Wolf
                break;
1104 6377af48 Kevin Wolf
            }
1105 6377af48 Kevin Wolf
            /* fall through */
1106 6377af48 Kevin Wolf
1107 afdf0abe Kevin Wolf
        case QCOW2_CLUSTER_NORMAL:
1108 afdf0abe Kevin Wolf
        {
1109 afdf0abe Kevin Wolf
            uint64_t offset = l2_entry & L2E_OFFSET_MASK;
1110 f7d0fe02 Kevin Wolf
1111 fba31bae Stefan Hajnoczi
            if (flags & CHECK_FRAG_INFO) {
1112 fba31bae Stefan Hajnoczi
                res->bfi.allocated_clusters++;
1113 fba31bae Stefan Hajnoczi
                if (next_contiguous_offset &&
1114 fba31bae Stefan Hajnoczi
                    offset != next_contiguous_offset) {
1115 fba31bae Stefan Hajnoczi
                    res->bfi.fragmented_clusters++;
1116 fba31bae Stefan Hajnoczi
                }
1117 fba31bae Stefan Hajnoczi
                next_contiguous_offset = offset + s->cluster_size;
1118 fba31bae Stefan Hajnoczi
            }
1119 fba31bae Stefan Hajnoczi
1120 afdf0abe Kevin Wolf
            /* Mark cluster as used */
1121 afdf0abe Kevin Wolf
            inc_refcounts(bs, res, refcount_table,refcount_table_size,
1122 afdf0abe Kevin Wolf
                offset, s->cluster_size);
1123 afdf0abe Kevin Wolf
1124 afdf0abe Kevin Wolf
            /* Correct offsets are cluster aligned */
1125 afdf0abe Kevin Wolf
            if (offset & (s->cluster_size - 1)) {
1126 afdf0abe Kevin Wolf
                fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
1127 afdf0abe Kevin Wolf
                    "properly aligned; L2 entry corrupted.\n", offset);
1128 afdf0abe Kevin Wolf
                res->corruptions++;
1129 afdf0abe Kevin Wolf
            }
1130 afdf0abe Kevin Wolf
            break;
1131 afdf0abe Kevin Wolf
        }
1132 afdf0abe Kevin Wolf
1133 afdf0abe Kevin Wolf
        case QCOW2_CLUSTER_UNALLOCATED:
1134 afdf0abe Kevin Wolf
            break;
1135 afdf0abe Kevin Wolf
1136 afdf0abe Kevin Wolf
        default:
1137 afdf0abe Kevin Wolf
            abort();
1138 f7d0fe02 Kevin Wolf
        }
1139 f7d0fe02 Kevin Wolf
    }
1140 f7d0fe02 Kevin Wolf
1141 7267c094 Anthony Liguori
    g_free(l2_table);
1142 9ac228e0 Kevin Wolf
    return 0;
1143 f7d0fe02 Kevin Wolf
1144 f7d0fe02 Kevin Wolf
fail:
1145 9ac228e0 Kevin Wolf
    fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
1146 7267c094 Anthony Liguori
    g_free(l2_table);
1147 f7d0fe02 Kevin Wolf
    return -EIO;
1148 f7d0fe02 Kevin Wolf
}
1149 f7d0fe02 Kevin Wolf
1150 f7d0fe02 Kevin Wolf
/*
1151 f7d0fe02 Kevin Wolf
 * Increases the refcount for the L1 table, its L2 tables and all referenced
1152 f7d0fe02 Kevin Wolf
 * clusters in the given refcount table. While doing so, performs some checks
1153 f7d0fe02 Kevin Wolf
 * on L1 and L2 entries.
1154 f7d0fe02 Kevin Wolf
 *
1155 f7d0fe02 Kevin Wolf
 * Returns the number of errors found by the checks or -errno if an internal
1156 f7d0fe02 Kevin Wolf
 * error occurred.
1157 f7d0fe02 Kevin Wolf
 */
1158 f7d0fe02 Kevin Wolf
static int check_refcounts_l1(BlockDriverState *bs,
1159 9ac228e0 Kevin Wolf
                              BdrvCheckResult *res,
1160 f7d0fe02 Kevin Wolf
                              uint16_t *refcount_table,
1161 f7d0fe02 Kevin Wolf
                              int refcount_table_size,
1162 f7d0fe02 Kevin Wolf
                              int64_t l1_table_offset, int l1_size,
1163 801f7044 Stefan Hajnoczi
                              int flags)
1164 f7d0fe02 Kevin Wolf
{
1165 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
1166 f7d0fe02 Kevin Wolf
    uint64_t *l1_table, l2_offset, l1_size2;
1167 4f6ed88c Max Reitz
    int i, ret;
1168 f7d0fe02 Kevin Wolf
1169 f7d0fe02 Kevin Wolf
    l1_size2 = l1_size * sizeof(uint64_t);
1170 f7d0fe02 Kevin Wolf
1171 f7d0fe02 Kevin Wolf
    /* Mark L1 table as used */
1172 9ac228e0 Kevin Wolf
    inc_refcounts(bs, res, refcount_table, refcount_table_size,
1173 9ac228e0 Kevin Wolf
        l1_table_offset, l1_size2);
1174 f7d0fe02 Kevin Wolf
1175 f7d0fe02 Kevin Wolf
    /* Read L1 table entries from disk */
1176 702ef63f Kevin Wolf
    if (l1_size2 == 0) {
1177 702ef63f Kevin Wolf
        l1_table = NULL;
1178 702ef63f Kevin Wolf
    } else {
1179 7267c094 Anthony Liguori
        l1_table = g_malloc(l1_size2);
1180 66f82cee Kevin Wolf
        if (bdrv_pread(bs->file, l1_table_offset,
1181 702ef63f Kevin Wolf
                       l1_table, l1_size2) != l1_size2)
1182 702ef63f Kevin Wolf
            goto fail;
1183 702ef63f Kevin Wolf
        for(i = 0;i < l1_size; i++)
1184 702ef63f Kevin Wolf
            be64_to_cpus(&l1_table[i]);
1185 702ef63f Kevin Wolf
    }
1186 f7d0fe02 Kevin Wolf
1187 f7d0fe02 Kevin Wolf
    /* Do the actual checks */
1188 f7d0fe02 Kevin Wolf
    for(i = 0; i < l1_size; i++) {
1189 f7d0fe02 Kevin Wolf
        l2_offset = l1_table[i];
1190 f7d0fe02 Kevin Wolf
        if (l2_offset) {
1191 f7d0fe02 Kevin Wolf
            /* Mark L2 table as used */
1192 afdf0abe Kevin Wolf
            l2_offset &= L1E_OFFSET_MASK;
1193 9ac228e0 Kevin Wolf
            inc_refcounts(bs, res, refcount_table, refcount_table_size,
1194 9ac228e0 Kevin Wolf
                l2_offset, s->cluster_size);
1195 f7d0fe02 Kevin Wolf
1196 f7d0fe02 Kevin Wolf
            /* L2 tables are cluster aligned */
1197 f7d0fe02 Kevin Wolf
            if (l2_offset & (s->cluster_size - 1)) {
1198 f7d0fe02 Kevin Wolf
                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
1199 f7d0fe02 Kevin Wolf
                    "cluster aligned; L1 entry corrupted\n", l2_offset);
1200 9ac228e0 Kevin Wolf
                res->corruptions++;
1201 f7d0fe02 Kevin Wolf
            }
1202 f7d0fe02 Kevin Wolf
1203 f7d0fe02 Kevin Wolf
            /* Process and check L2 entries */
1204 9ac228e0 Kevin Wolf
            ret = check_refcounts_l2(bs, res, refcount_table,
1205 801f7044 Stefan Hajnoczi
                                     refcount_table_size, l2_offset, flags);
1206 f7d0fe02 Kevin Wolf
            if (ret < 0) {
1207 f7d0fe02 Kevin Wolf
                goto fail;
1208 f7d0fe02 Kevin Wolf
            }
1209 f7d0fe02 Kevin Wolf
        }
1210 f7d0fe02 Kevin Wolf
    }
1211 7267c094 Anthony Liguori
    g_free(l1_table);
1212 9ac228e0 Kevin Wolf
    return 0;
1213 f7d0fe02 Kevin Wolf
1214 f7d0fe02 Kevin Wolf
fail:
1215 f7d0fe02 Kevin Wolf
    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
1216 9ac228e0 Kevin Wolf
    res->check_errors++;
1217 7267c094 Anthony Liguori
    g_free(l1_table);
1218 f7d0fe02 Kevin Wolf
    return -EIO;
1219 f7d0fe02 Kevin Wolf
}
1220 f7d0fe02 Kevin Wolf
1221 f7d0fe02 Kevin Wolf
/*
1222 4f6ed88c Max Reitz
 * Checks the OFLAG_COPIED flag for all L1 and L2 entries.
1223 4f6ed88c Max Reitz
 *
1224 4f6ed88c Max Reitz
 * This function does not print an error message nor does it increment
1225 4f6ed88c Max Reitz
 * check_errors if get_refcount fails (this is because such an error will have
1226 4f6ed88c Max Reitz
 * been already detected and sufficiently signaled by the calling function
1227 4f6ed88c Max Reitz
 * (qcow2_check_refcounts) by the time this function is called).
1228 4f6ed88c Max Reitz
 */
1229 e23e400e Max Reitz
static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
1230 e23e400e Max Reitz
                              BdrvCheckMode fix)
1231 4f6ed88c Max Reitz
{
1232 4f6ed88c Max Reitz
    BDRVQcowState *s = bs->opaque;
1233 4f6ed88c Max Reitz
    uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
1234 4f6ed88c Max Reitz
    int ret;
1235 4f6ed88c Max Reitz
    int refcount;
1236 4f6ed88c Max Reitz
    int i, j;
1237 4f6ed88c Max Reitz
1238 4f6ed88c Max Reitz
    for (i = 0; i < s->l1_size; i++) {
1239 4f6ed88c Max Reitz
        uint64_t l1_entry = s->l1_table[i];
1240 4f6ed88c Max Reitz
        uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
1241 e23e400e Max Reitz
        bool l2_dirty = false;
1242 4f6ed88c Max Reitz
1243 4f6ed88c Max Reitz
        if (!l2_offset) {
1244 4f6ed88c Max Reitz
            continue;
1245 4f6ed88c Max Reitz
        }
1246 4f6ed88c Max Reitz
1247 4f6ed88c Max Reitz
        refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
1248 4f6ed88c Max Reitz
        if (refcount < 0) {
1249 4f6ed88c Max Reitz
            /* don't print message nor increment check_errors */
1250 4f6ed88c Max Reitz
            continue;
1251 4f6ed88c Max Reitz
        }
1252 4f6ed88c Max Reitz
        if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
1253 e23e400e Max Reitz
            fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
1254 4f6ed88c Max Reitz
                    "l1_entry=%" PRIx64 " refcount=%d\n",
1255 e23e400e Max Reitz
                    fix & BDRV_FIX_ERRORS ? "Repairing" :
1256 e23e400e Max Reitz
                                            "ERROR",
1257 4f6ed88c Max Reitz
                    i, l1_entry, refcount);
1258 e23e400e Max Reitz
            if (fix & BDRV_FIX_ERRORS) {
1259 e23e400e Max Reitz
                s->l1_table[i] = refcount == 1
1260 e23e400e Max Reitz
                               ? l1_entry |  QCOW_OFLAG_COPIED
1261 e23e400e Max Reitz
                               : l1_entry & ~QCOW_OFLAG_COPIED;
1262 e23e400e Max Reitz
                ret = qcow2_write_l1_entry(bs, i);
1263 e23e400e Max Reitz
                if (ret < 0) {
1264 e23e400e Max Reitz
                    res->check_errors++;
1265 e23e400e Max Reitz
                    goto fail;
1266 e23e400e Max Reitz
                }
1267 e23e400e Max Reitz
                res->corruptions_fixed++;
1268 e23e400e Max Reitz
            } else {
1269 e23e400e Max Reitz
                res->corruptions++;
1270 e23e400e Max Reitz
            }
1271 4f6ed88c Max Reitz
        }
1272 4f6ed88c Max Reitz
1273 4f6ed88c Max Reitz
        ret = bdrv_pread(bs->file, l2_offset, l2_table,
1274 4f6ed88c Max Reitz
                         s->l2_size * sizeof(uint64_t));
1275 4f6ed88c Max Reitz
        if (ret < 0) {
1276 4f6ed88c Max Reitz
            fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
1277 4f6ed88c Max Reitz
                    strerror(-ret));
1278 4f6ed88c Max Reitz
            res->check_errors++;
1279 4f6ed88c Max Reitz
            goto fail;
1280 4f6ed88c Max Reitz
        }
1281 4f6ed88c Max Reitz
1282 4f6ed88c Max Reitz
        for (j = 0; j < s->l2_size; j++) {
1283 4f6ed88c Max Reitz
            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
1284 4f6ed88c Max Reitz
            uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
1285 4f6ed88c Max Reitz
            int cluster_type = qcow2_get_cluster_type(l2_entry);
1286 4f6ed88c Max Reitz
1287 4f6ed88c Max Reitz
            if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
1288 4f6ed88c Max Reitz
                ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
1289 4f6ed88c Max Reitz
                refcount = get_refcount(bs, data_offset >> s->cluster_bits);
1290 4f6ed88c Max Reitz
                if (refcount < 0) {
1291 4f6ed88c Max Reitz
                    /* don't print message nor increment check_errors */
1292 4f6ed88c Max Reitz
                    continue;
1293 4f6ed88c Max Reitz
                }
1294 4f6ed88c Max Reitz
                if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
1295 e23e400e Max Reitz
                    fprintf(stderr, "%s OFLAG_COPIED data cluster: "
1296 4f6ed88c Max Reitz
                            "l2_entry=%" PRIx64 " refcount=%d\n",
1297 e23e400e Max Reitz
                            fix & BDRV_FIX_ERRORS ? "Repairing" :
1298 e23e400e Max Reitz
                                                    "ERROR",
1299 4f6ed88c Max Reitz
                            l2_entry, refcount);
1300 e23e400e Max Reitz
                    if (fix & BDRV_FIX_ERRORS) {
1301 e23e400e Max Reitz
                        l2_table[j] = cpu_to_be64(refcount == 1
1302 e23e400e Max Reitz
                                    ? l2_entry |  QCOW_OFLAG_COPIED
1303 e23e400e Max Reitz
                                    : l2_entry & ~QCOW_OFLAG_COPIED);
1304 e23e400e Max Reitz
                        l2_dirty = true;
1305 e23e400e Max Reitz
                        res->corruptions_fixed++;
1306 e23e400e Max Reitz
                    } else {
1307 e23e400e Max Reitz
                        res->corruptions++;
1308 e23e400e Max Reitz
                    }
1309 4f6ed88c Max Reitz
                }
1310 4f6ed88c Max Reitz
            }
1311 4f6ed88c Max Reitz
        }
1312 e23e400e Max Reitz
1313 e23e400e Max Reitz
        if (l2_dirty) {
1314 231bb267 Max Reitz
            ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
1315 231bb267 Max Reitz
                                                l2_offset, s->cluster_size);
1316 e23e400e Max Reitz
            if (ret < 0) {
1317 e23e400e Max Reitz
                fprintf(stderr, "ERROR: Could not write L2 table; metadata "
1318 e23e400e Max Reitz
                        "overlap check failed: %s\n", strerror(-ret));
1319 e23e400e Max Reitz
                res->check_errors++;
1320 e23e400e Max Reitz
                goto fail;
1321 e23e400e Max Reitz
            }
1322 e23e400e Max Reitz
1323 e23e400e Max Reitz
            ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size);
1324 e23e400e Max Reitz
            if (ret < 0) {
1325 e23e400e Max Reitz
                fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
1326 e23e400e Max Reitz
                        strerror(-ret));
1327 e23e400e Max Reitz
                res->check_errors++;
1328 e23e400e Max Reitz
                goto fail;
1329 e23e400e Max Reitz
            }
1330 e23e400e Max Reitz
        }
1331 4f6ed88c Max Reitz
    }
1332 4f6ed88c Max Reitz
1333 4f6ed88c Max Reitz
    ret = 0;
1334 4f6ed88c Max Reitz
1335 4f6ed88c Max Reitz
fail:
1336 4f6ed88c Max Reitz
    qemu_vfree(l2_table);
1337 4f6ed88c Max Reitz
    return ret;
1338 4f6ed88c Max Reitz
}
1339 4f6ed88c Max Reitz
1340 4f6ed88c Max Reitz
/*
1341 afa50193 Max Reitz
 * Writes one sector of the refcount table to the disk
1342 afa50193 Max Reitz
 */
1343 afa50193 Max Reitz
#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
1344 afa50193 Max Reitz
static int write_reftable_entry(BlockDriverState *bs, int rt_index)
1345 afa50193 Max Reitz
{
1346 afa50193 Max Reitz
    BDRVQcowState *s = bs->opaque;
1347 afa50193 Max Reitz
    uint64_t buf[RT_ENTRIES_PER_SECTOR];
1348 afa50193 Max Reitz
    int rt_start_index;
1349 afa50193 Max Reitz
    int i, ret;
1350 afa50193 Max Reitz
1351 afa50193 Max Reitz
    rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
1352 afa50193 Max Reitz
    for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
1353 afa50193 Max Reitz
        buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
1354 afa50193 Max Reitz
    }
1355 afa50193 Max Reitz
1356 231bb267 Max Reitz
    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_TABLE,
1357 afa50193 Max Reitz
            s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
1358 afa50193 Max Reitz
            sizeof(buf));
1359 afa50193 Max Reitz
    if (ret < 0) {
1360 afa50193 Max Reitz
        return ret;
1361 afa50193 Max Reitz
    }
1362 afa50193 Max Reitz
1363 afa50193 Max Reitz
    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
1364 afa50193 Max Reitz
    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
1365 afa50193 Max Reitz
            rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
1366 afa50193 Max Reitz
    if (ret < 0) {
1367 afa50193 Max Reitz
        return ret;
1368 afa50193 Max Reitz
    }
1369 afa50193 Max Reitz
1370 afa50193 Max Reitz
    return 0;
1371 afa50193 Max Reitz
}
1372 afa50193 Max Reitz
1373 afa50193 Max Reitz
/*
1374 afa50193 Max Reitz
 * Allocates a new cluster for the given refcount block (represented by its
1375 afa50193 Max Reitz
 * offset in the image file) and copies the current content there. This function
1376 afa50193 Max Reitz
 * does _not_ decrement the reference count for the currently occupied cluster.
1377 afa50193 Max Reitz
 *
1378 afa50193 Max Reitz
 * This function prints an informative message to stderr on error (and returns
1379 afa50193 Max Reitz
 * -errno); on success, 0 is returned.
1380 afa50193 Max Reitz
 */
1381 afa50193 Max Reitz
static int64_t realloc_refcount_block(BlockDriverState *bs, int reftable_index,
1382 afa50193 Max Reitz
                                      uint64_t offset)
1383 afa50193 Max Reitz
{
1384 afa50193 Max Reitz
    BDRVQcowState *s = bs->opaque;
1385 afa50193 Max Reitz
    int64_t new_offset = 0;
1386 afa50193 Max Reitz
    void *refcount_block = NULL;
1387 afa50193 Max Reitz
    int ret;
1388 afa50193 Max Reitz
1389 afa50193 Max Reitz
    /* allocate new refcount block */
1390 afa50193 Max Reitz
    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
1391 afa50193 Max Reitz
    if (new_offset < 0) {
1392 afa50193 Max Reitz
        fprintf(stderr, "Could not allocate new cluster: %s\n",
1393 afa50193 Max Reitz
                strerror(-new_offset));
1394 afa50193 Max Reitz
        ret = new_offset;
1395 afa50193 Max Reitz
        goto fail;
1396 afa50193 Max Reitz
    }
1397 afa50193 Max Reitz
1398 afa50193 Max Reitz
    /* fetch current refcount block content */
1399 afa50193 Max Reitz
    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, &refcount_block);
1400 afa50193 Max Reitz
    if (ret < 0) {
1401 afa50193 Max Reitz
        fprintf(stderr, "Could not fetch refcount block: %s\n", strerror(-ret));
1402 afa50193 Max Reitz
        goto fail;
1403 afa50193 Max Reitz
    }
1404 afa50193 Max Reitz
1405 afa50193 Max Reitz
    /* new block has not yet been entered into refcount table, therefore it is
1406 afa50193 Max Reitz
     * no refcount block yet (regarding this check) */
1407 231bb267 Max Reitz
    ret = qcow2_pre_write_overlap_check(bs, 0, new_offset, s->cluster_size);
1408 afa50193 Max Reitz
    if (ret < 0) {
1409 afa50193 Max Reitz
        fprintf(stderr, "Could not write refcount block; metadata overlap "
1410 afa50193 Max Reitz
                "check failed: %s\n", strerror(-ret));
1411 afa50193 Max Reitz
        /* the image will be marked corrupt, so don't even attempt on freeing
1412 afa50193 Max Reitz
         * the cluster */
1413 afa50193 Max Reitz
        new_offset = 0;
1414 afa50193 Max Reitz
        goto fail;
1415 afa50193 Max Reitz
    }
1416 afa50193 Max Reitz
1417 afa50193 Max Reitz
    /* write to new block */
1418 afa50193 Max Reitz
    ret = bdrv_write(bs->file, new_offset / BDRV_SECTOR_SIZE, refcount_block,
1419 afa50193 Max Reitz
            s->cluster_sectors);
1420 afa50193 Max Reitz
    if (ret < 0) {
1421 afa50193 Max Reitz
        fprintf(stderr, "Could not write refcount block: %s\n", strerror(-ret));
1422 afa50193 Max Reitz
        goto fail;
1423 afa50193 Max Reitz
    }
1424 afa50193 Max Reitz
1425 afa50193 Max Reitz
    /* update refcount table */
1426 afa50193 Max Reitz
    assert(!(new_offset & (s->cluster_size - 1)));
1427 afa50193 Max Reitz
    s->refcount_table[reftable_index] = new_offset;
1428 afa50193 Max Reitz
    ret = write_reftable_entry(bs, reftable_index);
1429 afa50193 Max Reitz
    if (ret < 0) {
1430 afa50193 Max Reitz
        fprintf(stderr, "Could not update refcount table: %s\n",
1431 afa50193 Max Reitz
                strerror(-ret));
1432 afa50193 Max Reitz
        goto fail;
1433 afa50193 Max Reitz
    }
1434 afa50193 Max Reitz
1435 afa50193 Max Reitz
fail:
1436 afa50193 Max Reitz
    if (new_offset && (ret < 0)) {
1437 afa50193 Max Reitz
        qcow2_free_clusters(bs, new_offset, s->cluster_size,
1438 afa50193 Max Reitz
                QCOW2_DISCARD_ALWAYS);
1439 afa50193 Max Reitz
    }
1440 afa50193 Max Reitz
    if (refcount_block) {
1441 afa50193 Max Reitz
        if (ret < 0) {
1442 afa50193 Max Reitz
            qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
1443 afa50193 Max Reitz
        } else {
1444 afa50193 Max Reitz
            ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
1445 afa50193 Max Reitz
        }
1446 afa50193 Max Reitz
    }
1447 afa50193 Max Reitz
    if (ret < 0) {
1448 afa50193 Max Reitz
        return ret;
1449 afa50193 Max Reitz
    }
1450 afa50193 Max Reitz
    return new_offset;
1451 afa50193 Max Reitz
}
1452 afa50193 Max Reitz
1453 afa50193 Max Reitz
/*
1454 f7d0fe02 Kevin Wolf
 * Checks an image for refcount consistency.
1455 f7d0fe02 Kevin Wolf
 *
1456 f7d0fe02 Kevin Wolf
 * Returns 0 if no errors are found, the number of errors in case the image is
1457 a1c7273b Stefan Weil
 * detected as corrupted, and -errno when an internal error occurred.
1458 f7d0fe02 Kevin Wolf
 */
1459 166acf54 Kevin Wolf
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
1460 166acf54 Kevin Wolf
                          BdrvCheckMode fix)
1461 f7d0fe02 Kevin Wolf
{
1462 f7d0fe02 Kevin Wolf
    BDRVQcowState *s = bs->opaque;
1463 c6bb9ad1 Federico Simoncelli
    int64_t size, i, highest_cluster;
1464 166acf54 Kevin Wolf
    int nb_clusters, refcount1, refcount2;
1465 f7d0fe02 Kevin Wolf
    QCowSnapshot *sn;
1466 f7d0fe02 Kevin Wolf
    uint16_t *refcount_table;
1467 9ac228e0 Kevin Wolf
    int ret;
1468 f7d0fe02 Kevin Wolf
1469 66f82cee Kevin Wolf
    size = bdrv_getlength(bs->file);
1470 f7d0fe02 Kevin Wolf
    nb_clusters = size_to_clusters(s, size);
1471 7267c094 Anthony Liguori
    refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
1472 f7d0fe02 Kevin Wolf
1473 c349ca4b Kevin Wolf
    res->bfi.total_clusters =
1474 c349ca4b Kevin Wolf
        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
1475 c349ca4b Kevin Wolf
1476 f7d0fe02 Kevin Wolf
    /* header */
1477 9ac228e0 Kevin Wolf
    inc_refcounts(bs, res, refcount_table, nb_clusters,
1478 9ac228e0 Kevin Wolf
        0, s->cluster_size);
1479 f7d0fe02 Kevin Wolf
1480 f7d0fe02 Kevin Wolf
    /* current L1 table */
1481 9ac228e0 Kevin Wolf
    ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
1482 db074901 Max Reitz
                             s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO);
1483 f7d0fe02 Kevin Wolf
    if (ret < 0) {
1484 80fa3341 Kevin Wolf
        goto fail;
1485 f7d0fe02 Kevin Wolf
    }
1486 f7d0fe02 Kevin Wolf
1487 f7d0fe02 Kevin Wolf
    /* snapshots */
1488 f7d0fe02 Kevin Wolf
    for(i = 0; i < s->nb_snapshots; i++) {
1489 f7d0fe02 Kevin Wolf
        sn = s->snapshots + i;
1490 9ac228e0 Kevin Wolf
        ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
1491 9ac228e0 Kevin Wolf
            sn->l1_table_offset, sn->l1_size, 0);
1492 9ac228e0 Kevin Wolf
        if (ret < 0) {
1493 80fa3341 Kevin Wolf
            goto fail;
1494 9ac228e0 Kevin Wolf
        }
1495 f7d0fe02 Kevin Wolf
    }
1496 9ac228e0 Kevin Wolf
    inc_refcounts(bs, res, refcount_table, nb_clusters,
1497 9ac228e0 Kevin Wolf
        s->snapshots_offset, s->snapshots_size);
1498 f7d0fe02 Kevin Wolf
1499 f7d0fe02 Kevin Wolf
    /* refcount data */
1500 9ac228e0 Kevin Wolf
    inc_refcounts(bs, res, refcount_table, nb_clusters,
1501 9ac228e0 Kevin Wolf
        s->refcount_table_offset,
1502 9ac228e0 Kevin Wolf
        s->refcount_table_size * sizeof(uint64_t));
1503 9ac228e0 Kevin Wolf
1504 f7d0fe02 Kevin Wolf
    for(i = 0; i < s->refcount_table_size; i++) {
1505 6882c8fa Kevin Wolf
        uint64_t offset, cluster;
1506 f7d0fe02 Kevin Wolf
        offset = s->refcount_table[i];
1507 6882c8fa Kevin Wolf
        cluster = offset >> s->cluster_bits;
1508 746c3cb5 Kevin Wolf
1509 746c3cb5 Kevin Wolf
        /* Refcount blocks are cluster aligned */
1510 746c3cb5 Kevin Wolf
        if (offset & (s->cluster_size - 1)) {
1511 166acf54 Kevin Wolf
            fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
1512 746c3cb5 Kevin Wolf
                "cluster aligned; refcount table entry corrupted\n", i);
1513 9ac228e0 Kevin Wolf
            res->corruptions++;
1514 6882c8fa Kevin Wolf
            continue;
1515 6882c8fa Kevin Wolf
        }
1516 6882c8fa Kevin Wolf
1517 6882c8fa Kevin Wolf
        if (cluster >= nb_clusters) {
1518 166acf54 Kevin Wolf
            fprintf(stderr, "ERROR refcount block %" PRId64
1519 166acf54 Kevin Wolf
                    " is outside image\n", i);
1520 9ac228e0 Kevin Wolf
            res->corruptions++;
1521 6882c8fa Kevin Wolf
            continue;
1522 746c3cb5 Kevin Wolf
        }
1523 746c3cb5 Kevin Wolf
1524 f7d0fe02 Kevin Wolf
        if (offset != 0) {
1525 9ac228e0 Kevin Wolf
            inc_refcounts(bs, res, refcount_table, nb_clusters,
1526 9ac228e0 Kevin Wolf
                offset, s->cluster_size);
1527 6882c8fa Kevin Wolf
            if (refcount_table[cluster] != 1) {
1528 afa50193 Max Reitz
                fprintf(stderr, "%s refcount block %" PRId64
1529 166acf54 Kevin Wolf
                    " refcount=%d\n",
1530 afa50193 Max Reitz
                    fix & BDRV_FIX_ERRORS ? "Repairing" :
1531 afa50193 Max Reitz
                                            "ERROR",
1532 6882c8fa Kevin Wolf
                    i, refcount_table[cluster]);
1533 afa50193 Max Reitz
1534 afa50193 Max Reitz
                if (fix & BDRV_FIX_ERRORS) {
1535 afa50193 Max Reitz
                    int64_t new_offset;
1536 afa50193 Max Reitz
1537 afa50193 Max Reitz
                    new_offset = realloc_refcount_block(bs, i, offset);
1538 afa50193 Max Reitz
                    if (new_offset < 0) {
1539 afa50193 Max Reitz
                        res->corruptions++;
1540 afa50193 Max Reitz
                        continue;
1541 afa50193 Max Reitz
                    }
1542 afa50193 Max Reitz
1543 afa50193 Max Reitz
                    /* update refcounts */
1544 afa50193 Max Reitz
                    if ((new_offset >> s->cluster_bits) >= nb_clusters) {
1545 afa50193 Max Reitz
                        /* increase refcount_table size if necessary */
1546 afa50193 Max Reitz
                        int old_nb_clusters = nb_clusters;
1547 afa50193 Max Reitz
                        nb_clusters = (new_offset >> s->cluster_bits) + 1;
1548 afa50193 Max Reitz
                        refcount_table = g_realloc(refcount_table,
1549 afa50193 Max Reitz
                                nb_clusters * sizeof(uint16_t));
1550 afa50193 Max Reitz
                        memset(&refcount_table[old_nb_clusters], 0, (nb_clusters
1551 afa50193 Max Reitz
                                - old_nb_clusters) * sizeof(uint16_t));
1552 afa50193 Max Reitz
                    }
1553 afa50193 Max Reitz
                    refcount_table[cluster]--;
1554 afa50193 Max Reitz
                    inc_refcounts(bs, res, refcount_table, nb_clusters,
1555 afa50193 Max Reitz
                            new_offset, s->cluster_size);
1556 afa50193 Max Reitz
1557 afa50193 Max Reitz
                    res->corruptions_fixed++;
1558 afa50193 Max Reitz
                } else {
1559 afa50193 Max Reitz
                    res->corruptions++;
1560 afa50193 Max Reitz
                }
1561 746c3cb5 Kevin Wolf
            }
1562 f7d0fe02 Kevin Wolf
        }
1563 f7d0fe02 Kevin Wolf
    }
1564 f7d0fe02 Kevin Wolf
1565 f7d0fe02 Kevin Wolf
    /* compare ref counts */
1566 c6bb9ad1 Federico Simoncelli
    for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
1567 f7d0fe02 Kevin Wolf
        refcount1 = get_refcount(bs, i);
1568 018faafd Kevin Wolf
        if (refcount1 < 0) {
1569 166acf54 Kevin Wolf
            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
1570 018faafd Kevin Wolf
                i, strerror(-refcount1));
1571 9ac228e0 Kevin Wolf
            res->check_errors++;
1572 f74550fd Kevin Wolf
            continue;
1573 018faafd Kevin Wolf
        }
1574 018faafd Kevin Wolf
1575 f7d0fe02 Kevin Wolf
        refcount2 = refcount_table[i];
1576 c6bb9ad1 Federico Simoncelli
1577 c6bb9ad1 Federico Simoncelli
        if (refcount1 > 0 || refcount2 > 0) {
1578 c6bb9ad1 Federico Simoncelli
            highest_cluster = i;
1579 c6bb9ad1 Federico Simoncelli
        }
1580 c6bb9ad1 Federico Simoncelli
1581 f7d0fe02 Kevin Wolf
        if (refcount1 != refcount2) {
1582 166acf54 Kevin Wolf
1583 166acf54 Kevin Wolf
            /* Check if we're allowed to fix the mismatch */
1584 166acf54 Kevin Wolf
            int *num_fixed = NULL;
1585 166acf54 Kevin Wolf
            if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
1586 166acf54 Kevin Wolf
                num_fixed = &res->leaks_fixed;
1587 166acf54 Kevin Wolf
            } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
1588 166acf54 Kevin Wolf
                num_fixed = &res->corruptions_fixed;
1589 166acf54 Kevin Wolf
            }
1590 166acf54 Kevin Wolf
1591 166acf54 Kevin Wolf
            fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
1592 166acf54 Kevin Wolf
                   num_fixed != NULL     ? "Repairing" :
1593 166acf54 Kevin Wolf
                   refcount1 < refcount2 ? "ERROR" :
1594 166acf54 Kevin Wolf
                                           "Leaked",
1595 f7d0fe02 Kevin Wolf
                   i, refcount1, refcount2);
1596 166acf54 Kevin Wolf
1597 166acf54 Kevin Wolf
            if (num_fixed) {
1598 166acf54 Kevin Wolf
                ret = update_refcount(bs, i << s->cluster_bits, 1,
1599 6cfcb9b8 Kevin Wolf
                                      refcount2 - refcount1,
1600 6cfcb9b8 Kevin Wolf
                                      QCOW2_DISCARD_ALWAYS);
1601 166acf54 Kevin Wolf
                if (ret >= 0) {
1602 166acf54 Kevin Wolf
                    (*num_fixed)++;
1603 166acf54 Kevin Wolf
                    continue;
1604 166acf54 Kevin Wolf
                }
1605 166acf54 Kevin Wolf
            }
1606 166acf54 Kevin Wolf
1607 166acf54 Kevin Wolf
            /* And if we couldn't, print an error */
1608 9ac228e0 Kevin Wolf
            if (refcount1 < refcount2) {
1609 9ac228e0 Kevin Wolf
                res->corruptions++;
1610 9ac228e0 Kevin Wolf
            } else {
1611 9ac228e0 Kevin Wolf
                res->leaks++;
1612 9ac228e0 Kevin Wolf
            }
1613 f7d0fe02 Kevin Wolf
        }
1614 f7d0fe02 Kevin Wolf
    }
1615 f7d0fe02 Kevin Wolf
1616 4f6ed88c Max Reitz
    /* check OFLAG_COPIED */
1617 e23e400e Max Reitz
    ret = check_oflag_copied(bs, res, fix);
1618 4f6ed88c Max Reitz
    if (ret < 0) {
1619 4f6ed88c Max Reitz
        goto fail;
1620 4f6ed88c Max Reitz
    }
1621 4f6ed88c Max Reitz
1622 c6bb9ad1 Federico Simoncelli
    res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
1623 80fa3341 Kevin Wolf
    ret = 0;
1624 80fa3341 Kevin Wolf
1625 80fa3341 Kevin Wolf
fail:
1626 7267c094 Anthony Liguori
    g_free(refcount_table);
1627 f7d0fe02 Kevin Wolf
1628 80fa3341 Kevin Wolf
    return ret;
1629 f7d0fe02 Kevin Wolf
}
1630 f7d0fe02 Kevin Wolf
1631 a40f1c2a Max Reitz
#define overlaps_with(ofs, sz) \
1632 a40f1c2a Max Reitz
    ranges_overlap(offset, size, ofs, sz)
1633 a40f1c2a Max Reitz
1634 a40f1c2a Max Reitz
/*
1635 a40f1c2a Max Reitz
 * Checks if the given offset into the image file is actually free to use by
1636 a40f1c2a Max Reitz
 * looking for overlaps with important metadata sections (L1/L2 tables etc.),
1637 a40f1c2a Max Reitz
 * i.e. a sanity check without relying on the refcount tables.
1638 a40f1c2a Max Reitz
 *
1639 231bb267 Max Reitz
 * The ign parameter specifies what checks not to perform (being a bitmask of
1640 231bb267 Max Reitz
 * QCow2MetadataOverlap values), i.e., what sections to ignore.
1641 a40f1c2a Max Reitz
 *
1642 a40f1c2a Max Reitz
 * Returns:
1643 a40f1c2a Max Reitz
 * - 0 if writing to this offset will not affect the mentioned metadata
1644 a40f1c2a Max Reitz
 * - a positive QCow2MetadataOverlap value indicating one overlapping section
1645 a40f1c2a Max Reitz
 * - a negative value (-errno) indicating an error while performing a check,
1646 a40f1c2a Max Reitz
 *   e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
1647 a40f1c2a Max Reitz
 */
1648 231bb267 Max Reitz
int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
1649 a40f1c2a Max Reitz
                                 int64_t size)
1650 a40f1c2a Max Reitz
{
1651 a40f1c2a Max Reitz
    BDRVQcowState *s = bs->opaque;
1652 3e355390 Max Reitz
    int chk = s->overlap_check & ~ign;
1653 a40f1c2a Max Reitz
    int i, j;
1654 a40f1c2a Max Reitz
1655 a40f1c2a Max Reitz
    if (!size) {
1656 a40f1c2a Max Reitz
        return 0;
1657 a40f1c2a Max Reitz
    }
1658 a40f1c2a Max Reitz
1659 a40f1c2a Max Reitz
    if (chk & QCOW2_OL_MAIN_HEADER) {
1660 a40f1c2a Max Reitz
        if (offset < s->cluster_size) {
1661 a40f1c2a Max Reitz
            return QCOW2_OL_MAIN_HEADER;
1662 a40f1c2a Max Reitz
        }
1663 a40f1c2a Max Reitz
    }
1664 a40f1c2a Max Reitz
1665 a40f1c2a Max Reitz
    /* align range to test to cluster boundaries */
1666 a40f1c2a Max Reitz
    size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size);
1667 a40f1c2a Max Reitz
    offset = start_of_cluster(s, offset);
1668 a40f1c2a Max Reitz
1669 a40f1c2a Max Reitz
    if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
1670 a40f1c2a Max Reitz
        if (overlaps_with(s->l1_table_offset, s->l1_size * sizeof(uint64_t))) {
1671 a40f1c2a Max Reitz
            return QCOW2_OL_ACTIVE_L1;
1672 a40f1c2a Max Reitz
        }
1673 a40f1c2a Max Reitz
    }
1674 a40f1c2a Max Reitz
1675 a40f1c2a Max Reitz
    if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
1676 a40f1c2a Max Reitz
        if (overlaps_with(s->refcount_table_offset,
1677 a40f1c2a Max Reitz
            s->refcount_table_size * sizeof(uint64_t))) {
1678 a40f1c2a Max Reitz
            return QCOW2_OL_REFCOUNT_TABLE;
1679 a40f1c2a Max Reitz
        }
1680 a40f1c2a Max Reitz
    }
1681 a40f1c2a Max Reitz
1682 a40f1c2a Max Reitz
    if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
1683 a40f1c2a Max Reitz
        if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
1684 a40f1c2a Max Reitz
            return QCOW2_OL_SNAPSHOT_TABLE;
1685 a40f1c2a Max Reitz
        }
1686 a40f1c2a Max Reitz
    }
1687 a40f1c2a Max Reitz
1688 a40f1c2a Max Reitz
    if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
1689 a40f1c2a Max Reitz
        for (i = 0; i < s->nb_snapshots; i++) {
1690 a40f1c2a Max Reitz
            if (s->snapshots[i].l1_size &&
1691 a40f1c2a Max Reitz
                overlaps_with(s->snapshots[i].l1_table_offset,
1692 a40f1c2a Max Reitz
                s->snapshots[i].l1_size * sizeof(uint64_t))) {
1693 a40f1c2a Max Reitz
                return QCOW2_OL_INACTIVE_L1;
1694 a40f1c2a Max Reitz
            }
1695 a40f1c2a Max Reitz
        }
1696 a40f1c2a Max Reitz
    }
1697 a40f1c2a Max Reitz
1698 a40f1c2a Max Reitz
    if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
1699 a40f1c2a Max Reitz
        for (i = 0; i < s->l1_size; i++) {
1700 a40f1c2a Max Reitz
            if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
1701 a40f1c2a Max Reitz
                overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
1702 a40f1c2a Max Reitz
                s->cluster_size)) {
1703 a40f1c2a Max Reitz
                return QCOW2_OL_ACTIVE_L2;
1704 a40f1c2a Max Reitz
            }
1705 a40f1c2a Max Reitz
        }
1706 a40f1c2a Max Reitz
    }
1707 a40f1c2a Max Reitz
1708 a40f1c2a Max Reitz
    if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
1709 a40f1c2a Max Reitz
        for (i = 0; i < s->refcount_table_size; i++) {
1710 a40f1c2a Max Reitz
            if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
1711 a40f1c2a Max Reitz
                overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
1712 a40f1c2a Max Reitz
                s->cluster_size)) {
1713 a40f1c2a Max Reitz
                return QCOW2_OL_REFCOUNT_BLOCK;
1714 a40f1c2a Max Reitz
            }
1715 a40f1c2a Max Reitz
        }
1716 a40f1c2a Max Reitz
    }
1717 a40f1c2a Max Reitz
1718 a40f1c2a Max Reitz
    if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
1719 a40f1c2a Max Reitz
        for (i = 0; i < s->nb_snapshots; i++) {
1720 a40f1c2a Max Reitz
            uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
1721 a40f1c2a Max Reitz
            uint32_t l1_sz  = s->snapshots[i].l1_size;
1722 998b959c Max Reitz
            uint64_t l1_sz2 = l1_sz * sizeof(uint64_t);
1723 998b959c Max Reitz
            uint64_t *l1 = g_malloc(l1_sz2);
1724 a40f1c2a Max Reitz
            int ret;
1725 a40f1c2a Max Reitz
1726 998b959c Max Reitz
            ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
1727 a40f1c2a Max Reitz
            if (ret < 0) {
1728 a40f1c2a Max Reitz
                g_free(l1);
1729 a40f1c2a Max Reitz
                return ret;
1730 a40f1c2a Max Reitz
            }
1731 a40f1c2a Max Reitz
1732 a40f1c2a Max Reitz
            for (j = 0; j < l1_sz; j++) {
1733 1e242b55 Max Reitz
                uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK;
1734 1e242b55 Max Reitz
                if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) {
1735 a40f1c2a Max Reitz
                    g_free(l1);
1736 a40f1c2a Max Reitz
                    return QCOW2_OL_INACTIVE_L2;
1737 a40f1c2a Max Reitz
                }
1738 a40f1c2a Max Reitz
            }
1739 a40f1c2a Max Reitz
1740 a40f1c2a Max Reitz
            g_free(l1);
1741 a40f1c2a Max Reitz
        }
1742 a40f1c2a Max Reitz
    }
1743 a40f1c2a Max Reitz
1744 a40f1c2a Max Reitz
    return 0;
1745 a40f1c2a Max Reitz
}
1746 a40f1c2a Max Reitz
1747 a40f1c2a Max Reitz
static const char *metadata_ol_names[] = {
1748 a40f1c2a Max Reitz
    [QCOW2_OL_MAIN_HEADER_BITNR]    = "qcow2_header",
1749 a40f1c2a Max Reitz
    [QCOW2_OL_ACTIVE_L1_BITNR]      = "active L1 table",
1750 a40f1c2a Max Reitz
    [QCOW2_OL_ACTIVE_L2_BITNR]      = "active L2 table",
1751 a40f1c2a Max Reitz
    [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
1752 a40f1c2a Max Reitz
    [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
1753 a40f1c2a Max Reitz
    [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
1754 a40f1c2a Max Reitz
    [QCOW2_OL_INACTIVE_L1_BITNR]    = "inactive L1 table",
1755 a40f1c2a Max Reitz
    [QCOW2_OL_INACTIVE_L2_BITNR]    = "inactive L2 table",
1756 a40f1c2a Max Reitz
};
1757 a40f1c2a Max Reitz
1758 a40f1c2a Max Reitz
/*
1759 a40f1c2a Max Reitz
 * First performs a check for metadata overlaps (through
1760 a40f1c2a Max Reitz
 * qcow2_check_metadata_overlap); if that fails with a negative value (error
1761 a40f1c2a Max Reitz
 * while performing a check), that value is returned. If an impending overlap
1762 a40f1c2a Max Reitz
 * is detected, the BDS will be made unusable, the qcow2 file marked corrupt
1763 a40f1c2a Max Reitz
 * and -EIO returned.
1764 a40f1c2a Max Reitz
 *
1765 a40f1c2a Max Reitz
 * Returns 0 if there were neither overlaps nor errors while checking for
1766 a40f1c2a Max Reitz
 * overlaps; or a negative value (-errno) on error.
1767 a40f1c2a Max Reitz
 */
1768 231bb267 Max Reitz
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
1769 a40f1c2a Max Reitz
                                  int64_t size)
1770 a40f1c2a Max Reitz
{
1771 231bb267 Max Reitz
    int ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
1772 a40f1c2a Max Reitz
1773 a40f1c2a Max Reitz
    if (ret < 0) {
1774 a40f1c2a Max Reitz
        return ret;
1775 a40f1c2a Max Reitz
    } else if (ret > 0) {
1776 a40f1c2a Max Reitz
        int metadata_ol_bitnr = ffs(ret) - 1;
1777 a40f1c2a Max Reitz
        char *message;
1778 a40f1c2a Max Reitz
        QObject *data;
1779 a40f1c2a Max Reitz
1780 a40f1c2a Max Reitz
        assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
1781 a40f1c2a Max Reitz
1782 a40f1c2a Max Reitz
        fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps "
1783 a40f1c2a Max Reitz
                "with %s); image marked as corrupt.\n",
1784 a40f1c2a Max Reitz
                metadata_ol_names[metadata_ol_bitnr]);
1785 a40f1c2a Max Reitz
        message = g_strdup_printf("Prevented %s overwrite",
1786 a40f1c2a Max Reitz
                metadata_ol_names[metadata_ol_bitnr]);
1787 a40f1c2a Max Reitz
        data = qobject_from_jsonf("{ 'device': %s, 'msg': %s, 'offset': %"
1788 a40f1c2a Max Reitz
                PRId64 ", 'size': %" PRId64 " }", bs->device_name, message,
1789 a40f1c2a Max Reitz
                offset, size);
1790 a40f1c2a Max Reitz
        monitor_protocol_event(QEVENT_BLOCK_IMAGE_CORRUPTED, data);
1791 a40f1c2a Max Reitz
        g_free(message);
1792 a40f1c2a Max Reitz
        qobject_decref(data);
1793 a40f1c2a Max Reitz
1794 a40f1c2a Max Reitz
        qcow2_mark_corrupt(bs);
1795 a40f1c2a Max Reitz
        bs->drv = NULL; /* make BDS unusable */
1796 a40f1c2a Max Reitz
        return -EIO;
1797 a40f1c2a Max Reitz
    }
1798 a40f1c2a Max Reitz
1799 a40f1c2a Max Reitz
    return 0;
1800 a40f1c2a Max Reitz
}