root / block / stream.c @ 469ef350
History | View | Annotate | Download (7.9 kB)
1 |
/*
|
---|---|
2 |
* Image streaming
|
3 |
*
|
4 |
* Copyright IBM, Corp. 2011
|
5 |
*
|
6 |
* Authors:
|
7 |
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
|
8 |
*
|
9 |
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
10 |
* See the COPYING.LIB file in the top-level directory.
|
11 |
*
|
12 |
*/
|
13 |
|
14 |
#include "trace.h" |
15 |
#include "block_int.h" |
16 |
|
17 |
enum {
|
18 |
/*
|
19 |
* Size of data buffer for populating the image file. This should be large
|
20 |
* enough to process multiple clusters in a single call, so that populating
|
21 |
* contiguous regions of the image is efficient.
|
22 |
*/
|
23 |
STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */ |
24 |
}; |
25 |
|
26 |
#define SLICE_TIME 100000000ULL /* ns */ |
27 |
|
28 |
typedef struct { |
29 |
int64_t next_slice_time; |
30 |
uint64_t slice_quota; |
31 |
uint64_t dispatched; |
32 |
} RateLimit; |
33 |
|
34 |
static int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
|
35 |
{ |
36 |
int64_t delay_ns = 0;
|
37 |
int64_t now = qemu_get_clock_ns(rt_clock); |
38 |
|
39 |
if (limit->next_slice_time < now) {
|
40 |
limit->next_slice_time = now + SLICE_TIME; |
41 |
limit->dispatched = 0;
|
42 |
} |
43 |
if (limit->dispatched + n > limit->slice_quota) {
|
44 |
delay_ns = limit->next_slice_time - now; |
45 |
} else {
|
46 |
limit->dispatched += n; |
47 |
} |
48 |
return delay_ns;
|
49 |
} |
50 |
|
51 |
static void ratelimit_set_speed(RateLimit *limit, uint64_t speed) |
52 |
{ |
53 |
limit->slice_quota = speed / (1000000000ULL / SLICE_TIME);
|
54 |
} |
55 |
|
56 |
typedef struct StreamBlockJob { |
57 |
BlockJob common; |
58 |
RateLimit limit; |
59 |
BlockDriverState *base; |
60 |
char backing_file_id[1024]; |
61 |
} StreamBlockJob; |
62 |
|
63 |
static int coroutine_fn stream_populate(BlockDriverState *bs, |
64 |
int64_t sector_num, int nb_sectors,
|
65 |
void *buf)
|
66 |
{ |
67 |
struct iovec iov = {
|
68 |
.iov_base = buf, |
69 |
.iov_len = nb_sectors * BDRV_SECTOR_SIZE, |
70 |
}; |
71 |
QEMUIOVector qiov; |
72 |
|
73 |
qemu_iovec_init_external(&qiov, &iov, 1);
|
74 |
|
75 |
/* Copy-on-read the unallocated clusters */
|
76 |
return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
|
77 |
} |
78 |
|
79 |
static void close_unused_images(BlockDriverState *top, BlockDriverState *base, |
80 |
const char *base_id) |
81 |
{ |
82 |
BlockDriverState *intermediate; |
83 |
intermediate = top->backing_hd; |
84 |
|
85 |
while (intermediate) {
|
86 |
BlockDriverState *unused; |
87 |
|
88 |
/* reached base */
|
89 |
if (intermediate == base) {
|
90 |
break;
|
91 |
} |
92 |
|
93 |
unused = intermediate; |
94 |
intermediate = intermediate->backing_hd; |
95 |
unused->backing_hd = NULL;
|
96 |
bdrv_delete(unused); |
97 |
} |
98 |
top->backing_hd = base; |
99 |
} |
100 |
|
101 |
/*
|
102 |
* Given an image chain: [BASE] -> [INTER1] -> [INTER2] -> [TOP]
|
103 |
*
|
104 |
* Return true if the given sector is allocated in top.
|
105 |
* Return false if the given sector is allocated in intermediate images.
|
106 |
* Return true otherwise.
|
107 |
*
|
108 |
* 'pnum' is set to the number of sectors (including and immediately following
|
109 |
* the specified sector) that are known to be in the same
|
110 |
* allocated/unallocated state.
|
111 |
*
|
112 |
*/
|
113 |
static int coroutine_fn is_allocated_base(BlockDriverState *top, |
114 |
BlockDriverState *base, |
115 |
int64_t sector_num, |
116 |
int nb_sectors, int *pnum) |
117 |
{ |
118 |
BlockDriverState *intermediate; |
119 |
int ret, n;
|
120 |
|
121 |
ret = bdrv_co_is_allocated(top, sector_num, nb_sectors, &n); |
122 |
if (ret) {
|
123 |
*pnum = n; |
124 |
return ret;
|
125 |
} |
126 |
|
127 |
/*
|
128 |
* Is the unallocated chunk [sector_num, n] also
|
129 |
* unallocated between base and top?
|
130 |
*/
|
131 |
intermediate = top->backing_hd; |
132 |
|
133 |
while (intermediate) {
|
134 |
int pnum_inter;
|
135 |
|
136 |
/* reached base */
|
137 |
if (intermediate == base) {
|
138 |
*pnum = n; |
139 |
return 1; |
140 |
} |
141 |
ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors, |
142 |
&pnum_inter); |
143 |
if (ret < 0) { |
144 |
return ret;
|
145 |
} else if (ret) { |
146 |
*pnum = pnum_inter; |
147 |
return 0; |
148 |
} |
149 |
|
150 |
/*
|
151 |
* [sector_num, nb_sectors] is unallocated on top but intermediate
|
152 |
* might have
|
153 |
*
|
154 |
* [sector_num+x, nr_sectors] allocated.
|
155 |
*/
|
156 |
if (n > pnum_inter) {
|
157 |
n = pnum_inter; |
158 |
} |
159 |
|
160 |
intermediate = intermediate->backing_hd; |
161 |
} |
162 |
|
163 |
return 1; |
164 |
} |
165 |
|
166 |
static void coroutine_fn stream_run(void *opaque) |
167 |
{ |
168 |
StreamBlockJob *s = opaque; |
169 |
BlockDriverState *bs = s->common.bs; |
170 |
BlockDriverState *base = s->base; |
171 |
int64_t sector_num, end; |
172 |
int ret = 0; |
173 |
int n;
|
174 |
void *buf;
|
175 |
|
176 |
s->common.len = bdrv_getlength(bs); |
177 |
if (s->common.len < 0) { |
178 |
block_job_complete(&s->common, s->common.len); |
179 |
return;
|
180 |
} |
181 |
|
182 |
end = s->common.len >> BDRV_SECTOR_BITS; |
183 |
buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE); |
184 |
|
185 |
/* Turn on copy-on-read for the whole block device so that guest read
|
186 |
* requests help us make progress. Only do this when copying the entire
|
187 |
* backing chain since the copy-on-read operation does not take base into
|
188 |
* account.
|
189 |
*/
|
190 |
if (!base) {
|
191 |
bdrv_enable_copy_on_read(bs); |
192 |
} |
193 |
|
194 |
for (sector_num = 0; sector_num < end; sector_num += n) { |
195 |
retry:
|
196 |
if (block_job_is_cancelled(&s->common)) {
|
197 |
break;
|
198 |
} |
199 |
|
200 |
s->common.busy = true;
|
201 |
if (base) {
|
202 |
ret = is_allocated_base(bs, base, sector_num, |
203 |
STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); |
204 |
} else {
|
205 |
ret = bdrv_co_is_allocated(bs, sector_num, |
206 |
STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, |
207 |
&n); |
208 |
} |
209 |
trace_stream_one_iteration(s, sector_num, n, ret); |
210 |
if (ret == 0) { |
211 |
if (s->common.speed) {
|
212 |
uint64_t delay_ns = ratelimit_calculate_delay(&s->limit, n); |
213 |
if (delay_ns > 0) { |
214 |
s->common.busy = false;
|
215 |
co_sleep_ns(rt_clock, delay_ns); |
216 |
|
217 |
/* Recheck cancellation and that sectors are unallocated */
|
218 |
goto retry;
|
219 |
} |
220 |
} |
221 |
ret = stream_populate(bs, sector_num, n, buf); |
222 |
} |
223 |
if (ret < 0) { |
224 |
break;
|
225 |
} |
226 |
ret = 0;
|
227 |
|
228 |
/* Publish progress */
|
229 |
s->common.offset += n * BDRV_SECTOR_SIZE; |
230 |
|
231 |
/* Note that even when no rate limit is applied we need to yield
|
232 |
* with no pending I/O here so that qemu_aio_flush() returns.
|
233 |
*/
|
234 |
s->common.busy = false;
|
235 |
co_sleep_ns(rt_clock, 0);
|
236 |
} |
237 |
|
238 |
if (!base) {
|
239 |
bdrv_disable_copy_on_read(bs); |
240 |
} |
241 |
|
242 |
if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { |
243 |
const char *base_id = NULL; |
244 |
if (base) {
|
245 |
base_id = s->backing_file_id; |
246 |
} |
247 |
ret = bdrv_change_backing_file(bs, base_id, NULL);
|
248 |
close_unused_images(bs, base, base_id); |
249 |
} |
250 |
|
251 |
qemu_vfree(buf); |
252 |
block_job_complete(&s->common, ret); |
253 |
} |
254 |
|
255 |
static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp) |
256 |
{ |
257 |
StreamBlockJob *s = container_of(job, StreamBlockJob, common); |
258 |
|
259 |
if (speed < 0) { |
260 |
error_set(errp, QERR_INVALID_PARAMETER, "speed");
|
261 |
return;
|
262 |
} |
263 |
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE); |
264 |
} |
265 |
|
266 |
static BlockJobType stream_job_type = {
|
267 |
.instance_size = sizeof(StreamBlockJob),
|
268 |
.job_type = "stream",
|
269 |
.set_speed = stream_set_speed, |
270 |
}; |
271 |
|
272 |
void stream_start(BlockDriverState *bs, BlockDriverState *base,
|
273 |
const char *base_id, int64_t speed, |
274 |
BlockDriverCompletionFunc *cb, |
275 |
void *opaque, Error **errp)
|
276 |
{ |
277 |
StreamBlockJob *s; |
278 |
Coroutine *co; |
279 |
|
280 |
s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp); |
281 |
if (!s) {
|
282 |
return;
|
283 |
} |
284 |
|
285 |
s->base = base; |
286 |
if (base_id) {
|
287 |
pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
|
288 |
} |
289 |
|
290 |
co = qemu_coroutine_create(stream_run); |
291 |
trace_stream_start(bs, base, s, co, opaque); |
292 |
qemu_coroutine_enter(co, s); |
293 |
} |