root / block / mirror.c @ 737e150e
History | View | Annotate | Download (10 kB)
1 |
/*
|
---|---|
2 |
* Image mirroring
|
3 |
*
|
4 |
* Copyright Red Hat, Inc. 2012
|
5 |
*
|
6 |
* Authors:
|
7 |
* Paolo Bonzini <pbonzini@redhat.com>
|
8 |
*
|
9 |
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
10 |
* See the COPYING.LIB file in the top-level directory.
|
11 |
*
|
12 |
*/
|
13 |
|
14 |
#include "trace.h" |
15 |
#include "block/blockjob.h" |
16 |
#include "block/block_int.h" |
17 |
#include "qemu/ratelimit.h" |
18 |
|
19 |
enum {
|
20 |
/*
|
21 |
* Size of data buffer for populating the image file. This should be large
|
22 |
* enough to process multiple clusters in a single call, so that populating
|
23 |
* contiguous regions of the image is efficient.
|
24 |
*/
|
25 |
BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ |
26 |
}; |
27 |
|
28 |
#define SLICE_TIME 100000000ULL /* ns */ |
29 |
|
30 |
typedef struct MirrorBlockJob { |
31 |
BlockJob common; |
32 |
RateLimit limit; |
33 |
BlockDriverState *target; |
34 |
MirrorSyncMode mode; |
35 |
BlockdevOnError on_source_error, on_target_error; |
36 |
bool synced;
|
37 |
bool should_complete;
|
38 |
int64_t sector_num; |
39 |
uint8_t *buf; |
40 |
} MirrorBlockJob; |
41 |
|
42 |
static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, |
43 |
int error)
|
44 |
{ |
45 |
s->synced = false;
|
46 |
if (read) {
|
47 |
return block_job_error_action(&s->common, s->common.bs,
|
48 |
s->on_source_error, true, error);
|
49 |
} else {
|
50 |
return block_job_error_action(&s->common, s->target,
|
51 |
s->on_target_error, false, error);
|
52 |
} |
53 |
} |
54 |
|
55 |
static int coroutine_fn mirror_iteration(MirrorBlockJob *s, |
56 |
BlockErrorAction *p_action) |
57 |
{ |
58 |
BlockDriverState *source = s->common.bs; |
59 |
BlockDriverState *target = s->target; |
60 |
QEMUIOVector qiov; |
61 |
int ret, nb_sectors;
|
62 |
int64_t end; |
63 |
struct iovec iov;
|
64 |
|
65 |
end = s->common.len >> BDRV_SECTOR_BITS; |
66 |
s->sector_num = bdrv_get_next_dirty(source, s->sector_num); |
67 |
nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); |
68 |
bdrv_reset_dirty(source, s->sector_num, nb_sectors); |
69 |
|
70 |
/* Copy the dirty cluster. */
|
71 |
iov.iov_base = s->buf; |
72 |
iov.iov_len = nb_sectors * 512;
|
73 |
qemu_iovec_init_external(&qiov, &iov, 1);
|
74 |
|
75 |
trace_mirror_one_iteration(s, s->sector_num, nb_sectors); |
76 |
ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); |
77 |
if (ret < 0) { |
78 |
*p_action = mirror_error_action(s, true, -ret);
|
79 |
goto fail;
|
80 |
} |
81 |
ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); |
82 |
if (ret < 0) { |
83 |
*p_action = mirror_error_action(s, false, -ret);
|
84 |
s->synced = false;
|
85 |
goto fail;
|
86 |
} |
87 |
return 0; |
88 |
|
89 |
fail:
|
90 |
/* Try again later. */
|
91 |
bdrv_set_dirty(source, s->sector_num, nb_sectors); |
92 |
return ret;
|
93 |
} |
94 |
|
95 |
static void coroutine_fn mirror_run(void *opaque) |
96 |
{ |
97 |
MirrorBlockJob *s = opaque; |
98 |
BlockDriverState *bs = s->common.bs; |
99 |
int64_t sector_num, end; |
100 |
int ret = 0; |
101 |
int n;
|
102 |
|
103 |
if (block_job_is_cancelled(&s->common)) {
|
104 |
goto immediate_exit;
|
105 |
} |
106 |
|
107 |
s->common.len = bdrv_getlength(bs); |
108 |
if (s->common.len < 0) { |
109 |
block_job_completed(&s->common, s->common.len); |
110 |
return;
|
111 |
} |
112 |
|
113 |
end = s->common.len >> BDRV_SECTOR_BITS; |
114 |
s->buf = qemu_blockalign(bs, BLOCK_SIZE); |
115 |
|
116 |
if (s->mode != MIRROR_SYNC_MODE_NONE) {
|
117 |
/* First part, loop on the sectors and initialize the dirty bitmap. */
|
118 |
BlockDriverState *base; |
119 |
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
|
120 |
for (sector_num = 0; sector_num < end; ) { |
121 |
int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; |
122 |
ret = bdrv_co_is_allocated_above(bs, base, |
123 |
sector_num, next - sector_num, &n); |
124 |
|
125 |
if (ret < 0) { |
126 |
goto immediate_exit;
|
127 |
} |
128 |
|
129 |
assert(n > 0);
|
130 |
if (ret == 1) { |
131 |
bdrv_set_dirty(bs, sector_num, n); |
132 |
sector_num = next; |
133 |
} else {
|
134 |
sector_num += n; |
135 |
} |
136 |
} |
137 |
} |
138 |
|
139 |
s->sector_num = -1;
|
140 |
for (;;) {
|
141 |
uint64_t delay_ns; |
142 |
int64_t cnt; |
143 |
bool should_complete;
|
144 |
|
145 |
cnt = bdrv_get_dirty_count(bs); |
146 |
if (cnt != 0) { |
147 |
BlockErrorAction action = BDRV_ACTION_REPORT; |
148 |
ret = mirror_iteration(s, &action); |
149 |
if (ret < 0 && action == BDRV_ACTION_REPORT) { |
150 |
goto immediate_exit;
|
151 |
} |
152 |
cnt = bdrv_get_dirty_count(bs); |
153 |
} |
154 |
|
155 |
should_complete = false;
|
156 |
if (cnt == 0) { |
157 |
trace_mirror_before_flush(s); |
158 |
ret = bdrv_flush(s->target); |
159 |
if (ret < 0) { |
160 |
if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { |
161 |
goto immediate_exit;
|
162 |
} |
163 |
} else {
|
164 |
/* We're out of the streaming phase. From now on, if the job
|
165 |
* is cancelled we will actually complete all pending I/O and
|
166 |
* report completion. This way, block-job-cancel will leave
|
167 |
* the target in a consistent state.
|
168 |
*/
|
169 |
s->common.offset = end * BDRV_SECTOR_SIZE; |
170 |
if (!s->synced) {
|
171 |
block_job_ready(&s->common); |
172 |
s->synced = true;
|
173 |
} |
174 |
|
175 |
should_complete = s->should_complete || |
176 |
block_job_is_cancelled(&s->common); |
177 |
cnt = bdrv_get_dirty_count(bs); |
178 |
} |
179 |
} |
180 |
|
181 |
if (cnt == 0 && should_complete) { |
182 |
/* The dirty bitmap is not updated while operations are pending.
|
183 |
* If we're about to exit, wait for pending operations before
|
184 |
* calling bdrv_get_dirty_count(bs), or we may exit while the
|
185 |
* source has dirty data to copy!
|
186 |
*
|
187 |
* Note that I/O can be submitted by the guest while
|
188 |
* mirror_populate runs.
|
189 |
*/
|
190 |
trace_mirror_before_drain(s, cnt); |
191 |
bdrv_drain_all(); |
192 |
cnt = bdrv_get_dirty_count(bs); |
193 |
} |
194 |
|
195 |
ret = 0;
|
196 |
trace_mirror_before_sleep(s, cnt, s->synced); |
197 |
if (!s->synced) {
|
198 |
/* Publish progress */
|
199 |
s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; |
200 |
|
201 |
if (s->common.speed) {
|
202 |
delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); |
203 |
} else {
|
204 |
delay_ns = 0;
|
205 |
} |
206 |
|
207 |
/* Note that even when no rate limit is applied we need to yield
|
208 |
* with no pending I/O here so that bdrv_drain_all() returns.
|
209 |
*/
|
210 |
block_job_sleep_ns(&s->common, rt_clock, delay_ns); |
211 |
if (block_job_is_cancelled(&s->common)) {
|
212 |
break;
|
213 |
} |
214 |
} else if (!should_complete) { |
215 |
delay_ns = (cnt == 0 ? SLICE_TIME : 0); |
216 |
block_job_sleep_ns(&s->common, rt_clock, delay_ns); |
217 |
} else if (cnt == 0) { |
218 |
/* The two disks are in sync. Exit and report successful
|
219 |
* completion.
|
220 |
*/
|
221 |
assert(QLIST_EMPTY(&bs->tracked_requests)); |
222 |
s->common.cancelled = false;
|
223 |
break;
|
224 |
} |
225 |
} |
226 |
|
227 |
immediate_exit:
|
228 |
g_free(s->buf); |
229 |
bdrv_set_dirty_tracking(bs, false);
|
230 |
bdrv_iostatus_disable(s->target); |
231 |
if (s->should_complete && ret == 0) { |
232 |
if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
|
233 |
bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
|
234 |
} |
235 |
bdrv_swap(s->target, s->common.bs); |
236 |
} |
237 |
bdrv_close(s->target); |
238 |
bdrv_delete(s->target); |
239 |
block_job_completed(&s->common, ret); |
240 |
} |
241 |
|
242 |
static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) |
243 |
{ |
244 |
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); |
245 |
|
246 |
if (speed < 0) { |
247 |
error_set(errp, QERR_INVALID_PARAMETER, "speed");
|
248 |
return;
|
249 |
} |
250 |
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); |
251 |
} |
252 |
|
253 |
static void mirror_iostatus_reset(BlockJob *job) |
254 |
{ |
255 |
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); |
256 |
|
257 |
bdrv_iostatus_reset(s->target); |
258 |
} |
259 |
|
260 |
static void mirror_complete(BlockJob *job, Error **errp) |
261 |
{ |
262 |
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); |
263 |
int ret;
|
264 |
|
265 |
ret = bdrv_open_backing_file(s->target); |
266 |
if (ret < 0) { |
267 |
char backing_filename[PATH_MAX];
|
268 |
bdrv_get_full_backing_filename(s->target, backing_filename, |
269 |
sizeof(backing_filename));
|
270 |
error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); |
271 |
return;
|
272 |
} |
273 |
if (!s->synced) {
|
274 |
error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); |
275 |
return;
|
276 |
} |
277 |
|
278 |
s->should_complete = true;
|
279 |
block_job_resume(job); |
280 |
} |
281 |
|
282 |
static BlockJobType mirror_job_type = {
|
283 |
.instance_size = sizeof(MirrorBlockJob),
|
284 |
.job_type = "mirror",
|
285 |
.set_speed = mirror_set_speed, |
286 |
.iostatus_reset= mirror_iostatus_reset, |
287 |
.complete = mirror_complete, |
288 |
}; |
289 |
|
290 |
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
|
291 |
int64_t speed, MirrorSyncMode mode, |
292 |
BlockdevOnError on_source_error, |
293 |
BlockdevOnError on_target_error, |
294 |
BlockDriverCompletionFunc *cb, |
295 |
void *opaque, Error **errp)
|
296 |
{ |
297 |
MirrorBlockJob *s; |
298 |
|
299 |
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
|
300 |
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && |
301 |
!bdrv_iostatus_is_enabled(bs)) { |
302 |
error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
|
303 |
return;
|
304 |
} |
305 |
|
306 |
s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); |
307 |
if (!s) {
|
308 |
return;
|
309 |
} |
310 |
|
311 |
s->on_source_error = on_source_error; |
312 |
s->on_target_error = on_target_error; |
313 |
s->target = target; |
314 |
s->mode = mode; |
315 |
bdrv_set_dirty_tracking(bs, true);
|
316 |
bdrv_set_enable_write_cache(s->target, true);
|
317 |
bdrv_set_on_error(s->target, on_target_error, on_target_error); |
318 |
bdrv_iostatus_enable(s->target); |
319 |
s->common.co = qemu_coroutine_create(mirror_run); |
320 |
trace_mirror_start(bs, s, s->common.co, opaque); |
321 |
qemu_coroutine_enter(s->common.co, s); |
322 |
} |