Statistics
| Branch: | Revision:

root / block / mirror.c @ 737e150e

History | View | Annotate | Download (10 kB)

1
/*
2
 * Image mirroring
3
 *
4
 * Copyright Red Hat, Inc. 2012
5
 *
6
 * Authors:
7
 *  Paolo Bonzini  <pbonzini@redhat.com>
8
 *
9
 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10
 * See the COPYING.LIB file in the top-level directory.
11
 *
12
 */
13

    
14
#include "trace.h"
15
#include "block/blockjob.h"
16
#include "block/block_int.h"
17
#include "qemu/ratelimit.h"
18

    
19
enum {
20
    /*
21
     * Size of data buffer for populating the image file.  This should be large
22
     * enough to process multiple clusters in a single call, so that populating
23
     * contiguous regions of the image is efficient.
24
     */
25
    BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
26
};
27

    
28
#define SLICE_TIME 100000000ULL /* ns */
29

    
30
typedef struct MirrorBlockJob {
31
    BlockJob common;
32
    RateLimit limit;
33
    BlockDriverState *target;
34
    MirrorSyncMode mode;
35
    BlockdevOnError on_source_error, on_target_error;
36
    bool synced;
37
    bool should_complete;
38
    int64_t sector_num;
39
    uint8_t *buf;
40
} MirrorBlockJob;
41

    
42
static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
43
                                            int error)
44
{
45
    s->synced = false;
46
    if (read) {
47
        return block_job_error_action(&s->common, s->common.bs,
48
                                      s->on_source_error, true, error);
49
    } else {
50
        return block_job_error_action(&s->common, s->target,
51
                                      s->on_target_error, false, error);
52
    }
53
}
54

    
55
static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
56
                                         BlockErrorAction *p_action)
57
{
58
    BlockDriverState *source = s->common.bs;
59
    BlockDriverState *target = s->target;
60
    QEMUIOVector qiov;
61
    int ret, nb_sectors;
62
    int64_t end;
63
    struct iovec iov;
64

    
65
    end = s->common.len >> BDRV_SECTOR_BITS;
66
    s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
67
    nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
68
    bdrv_reset_dirty(source, s->sector_num, nb_sectors);
69

    
70
    /* Copy the dirty cluster.  */
71
    iov.iov_base = s->buf;
72
    iov.iov_len  = nb_sectors * 512;
73
    qemu_iovec_init_external(&qiov, &iov, 1);
74

    
75
    trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
76
    ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
77
    if (ret < 0) {
78
        *p_action = mirror_error_action(s, true, -ret);
79
        goto fail;
80
    }
81
    ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
82
    if (ret < 0) {
83
        *p_action = mirror_error_action(s, false, -ret);
84
        s->synced = false;
85
        goto fail;
86
    }
87
    return 0;
88

    
89
fail:
90
    /* Try again later.  */
91
    bdrv_set_dirty(source, s->sector_num, nb_sectors);
92
    return ret;
93
}
94

    
95
static void coroutine_fn mirror_run(void *opaque)
96
{
97
    MirrorBlockJob *s = opaque;
98
    BlockDriverState *bs = s->common.bs;
99
    int64_t sector_num, end;
100
    int ret = 0;
101
    int n;
102

    
103
    if (block_job_is_cancelled(&s->common)) {
104
        goto immediate_exit;
105
    }
106

    
107
    s->common.len = bdrv_getlength(bs);
108
    if (s->common.len < 0) {
109
        block_job_completed(&s->common, s->common.len);
110
        return;
111
    }
112

    
113
    end = s->common.len >> BDRV_SECTOR_BITS;
114
    s->buf = qemu_blockalign(bs, BLOCK_SIZE);
115

    
116
    if (s->mode != MIRROR_SYNC_MODE_NONE) {
117
        /* First part, loop on the sectors and initialize the dirty bitmap.  */
118
        BlockDriverState *base;
119
        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
120
        for (sector_num = 0; sector_num < end; ) {
121
            int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
122
            ret = bdrv_co_is_allocated_above(bs, base,
123
                                             sector_num, next - sector_num, &n);
124

    
125
            if (ret < 0) {
126
                goto immediate_exit;
127
            }
128

    
129
            assert(n > 0);
130
            if (ret == 1) {
131
                bdrv_set_dirty(bs, sector_num, n);
132
                sector_num = next;
133
            } else {
134
                sector_num += n;
135
            }
136
        }
137
    }
138

    
139
    s->sector_num = -1;
140
    for (;;) {
141
        uint64_t delay_ns;
142
        int64_t cnt;
143
        bool should_complete;
144

    
145
        cnt = bdrv_get_dirty_count(bs);
146
        if (cnt != 0) {
147
            BlockErrorAction action = BDRV_ACTION_REPORT;
148
            ret = mirror_iteration(s, &action);
149
            if (ret < 0 && action == BDRV_ACTION_REPORT) {
150
                goto immediate_exit;
151
            }
152
            cnt = bdrv_get_dirty_count(bs);
153
        }
154

    
155
        should_complete = false;
156
        if (cnt == 0) {
157
            trace_mirror_before_flush(s);
158
            ret = bdrv_flush(s->target);
159
            if (ret < 0) {
160
                if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
161
                    goto immediate_exit;
162
                }
163
            } else {
164
                /* We're out of the streaming phase.  From now on, if the job
165
                 * is cancelled we will actually complete all pending I/O and
166
                 * report completion.  This way, block-job-cancel will leave
167
                 * the target in a consistent state.
168
                 */
169
                s->common.offset = end * BDRV_SECTOR_SIZE;
170
                if (!s->synced) {
171
                    block_job_ready(&s->common);
172
                    s->synced = true;
173
                }
174

    
175
                should_complete = s->should_complete ||
176
                    block_job_is_cancelled(&s->common);
177
                cnt = bdrv_get_dirty_count(bs);
178
            }
179
        }
180

    
181
        if (cnt == 0 && should_complete) {
182
            /* The dirty bitmap is not updated while operations are pending.
183
             * If we're about to exit, wait for pending operations before
184
             * calling bdrv_get_dirty_count(bs), or we may exit while the
185
             * source has dirty data to copy!
186
             *
187
             * Note that I/O can be submitted by the guest while
188
             * mirror_populate runs.
189
             */
190
            trace_mirror_before_drain(s, cnt);
191
            bdrv_drain_all();
192
            cnt = bdrv_get_dirty_count(bs);
193
        }
194

    
195
        ret = 0;
196
        trace_mirror_before_sleep(s, cnt, s->synced);
197
        if (!s->synced) {
198
            /* Publish progress */
199
            s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
200

    
201
            if (s->common.speed) {
202
                delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
203
            } else {
204
                delay_ns = 0;
205
            }
206

    
207
            /* Note that even when no rate limit is applied we need to yield
208
             * with no pending I/O here so that bdrv_drain_all() returns.
209
             */
210
            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
211
            if (block_job_is_cancelled(&s->common)) {
212
                break;
213
            }
214
        } else if (!should_complete) {
215
            delay_ns = (cnt == 0 ? SLICE_TIME : 0);
216
            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
217
        } else if (cnt == 0) {
218
            /* The two disks are in sync.  Exit and report successful
219
             * completion.
220
             */
221
            assert(QLIST_EMPTY(&bs->tracked_requests));
222
            s->common.cancelled = false;
223
            break;
224
        }
225
    }
226

    
227
immediate_exit:
228
    g_free(s->buf);
229
    bdrv_set_dirty_tracking(bs, false);
230
    bdrv_iostatus_disable(s->target);
231
    if (s->should_complete && ret == 0) {
232
        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
233
            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
234
        }
235
        bdrv_swap(s->target, s->common.bs);
236
    }
237
    bdrv_close(s->target);
238
    bdrv_delete(s->target);
239
    block_job_completed(&s->common, ret);
240
}
241

    
242
static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
243
{
244
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
245

    
246
    if (speed < 0) {
247
        error_set(errp, QERR_INVALID_PARAMETER, "speed");
248
        return;
249
    }
250
    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
251
}
252

    
253
static void mirror_iostatus_reset(BlockJob *job)
254
{
255
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
256

    
257
    bdrv_iostatus_reset(s->target);
258
}
259

    
260
static void mirror_complete(BlockJob *job, Error **errp)
261
{
262
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
263
    int ret;
264

    
265
    ret = bdrv_open_backing_file(s->target);
266
    if (ret < 0) {
267
        char backing_filename[PATH_MAX];
268
        bdrv_get_full_backing_filename(s->target, backing_filename,
269
                                       sizeof(backing_filename));
270
        error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
271
        return;
272
    }
273
    if (!s->synced) {
274
        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
275
        return;
276
    }
277

    
278
    s->should_complete = true;
279
    block_job_resume(job);
280
}
281

    
282
static BlockJobType mirror_job_type = {
283
    .instance_size = sizeof(MirrorBlockJob),
284
    .job_type      = "mirror",
285
    .set_speed     = mirror_set_speed,
286
    .iostatus_reset= mirror_iostatus_reset,
287
    .complete      = mirror_complete,
288
};
289

    
290
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
291
                  int64_t speed, MirrorSyncMode mode,
292
                  BlockdevOnError on_source_error,
293
                  BlockdevOnError on_target_error,
294
                  BlockDriverCompletionFunc *cb,
295
                  void *opaque, Error **errp)
296
{
297
    MirrorBlockJob *s;
298

    
299
    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
300
         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
301
        !bdrv_iostatus_is_enabled(bs)) {
302
        error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
303
        return;
304
    }
305

    
306
    s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
307
    if (!s) {
308
        return;
309
    }
310

    
311
    s->on_source_error = on_source_error;
312
    s->on_target_error = on_target_error;
313
    s->target = target;
314
    s->mode = mode;
315
    bdrv_set_dirty_tracking(bs, true);
316
    bdrv_set_enable_write_cache(s->target, true);
317
    bdrv_set_on_error(s->target, on_target_error, on_target_error);
318
    bdrv_iostatus_enable(s->target);
319
    s->common.co = qemu_coroutine_create(mirror_run);
320
    trace_mirror_start(bs, s, s->common.co, opaque);
321
    qemu_coroutine_enter(s->common.co, s);
322
}