Statistics
| Branch: | Revision:

root / block-migration.c @ ed2c54d4

History | View | Annotate | Download (16.8 kB)

1 c163b5ca lirans@il.ibm.com
/*
2 c163b5ca lirans@il.ibm.com
 * QEMU live block migration
3 c163b5ca lirans@il.ibm.com
 *
4 c163b5ca lirans@il.ibm.com
 * Copyright IBM, Corp. 2009
5 c163b5ca lirans@il.ibm.com
 *
6 c163b5ca lirans@il.ibm.com
 * Authors:
7 c163b5ca lirans@il.ibm.com
 *  Liran Schour   <lirans@il.ibm.com>
8 c163b5ca lirans@il.ibm.com
 *
9 c163b5ca lirans@il.ibm.com
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10 c163b5ca lirans@il.ibm.com
 * the COPYING file in the top-level directory.
11 c163b5ca lirans@il.ibm.com
 *
12 c163b5ca lirans@il.ibm.com
 */
13 c163b5ca lirans@il.ibm.com
14 c163b5ca lirans@il.ibm.com
#include "qemu-common.h"
15 c163b5ca lirans@il.ibm.com
#include "block_int.h"
16 c163b5ca lirans@il.ibm.com
#include "hw/hw.h"
17 5e5328be Jan Kiszka
#include "qemu-queue.h"
18 889ae39c Liran Schour
#include "qemu-timer.h"
19 7184049e Jan Kiszka
#include "monitor.h"
20 c163b5ca lirans@il.ibm.com
#include "block-migration.h"
21 889ae39c Liran Schour
#include "migration.h"
22 c163b5ca lirans@il.ibm.com
#include <assert.h>
23 c163b5ca lirans@il.ibm.com
24 6ea44308 Jan Kiszka
#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
25 c163b5ca lirans@il.ibm.com
26 c163b5ca lirans@il.ibm.com
#define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
27 c163b5ca lirans@il.ibm.com
#define BLK_MIG_FLAG_EOS                0x02
28 01e61e2d Jan Kiszka
#define BLK_MIG_FLAG_PROGRESS           0x04
29 c163b5ca lirans@il.ibm.com
30 c163b5ca lirans@il.ibm.com
#define MAX_IS_ALLOCATED_SEARCH 65536
31 c163b5ca lirans@il.ibm.com
32 c163b5ca lirans@il.ibm.com
//#define DEBUG_BLK_MIGRATION
33 c163b5ca lirans@il.ibm.com
34 c163b5ca lirans@il.ibm.com
#ifdef DEBUG_BLK_MIGRATION
35 d0f2c4c6 malc
#define DPRINTF(fmt, ...) \
36 c163b5ca lirans@il.ibm.com
    do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
37 c163b5ca lirans@il.ibm.com
#else
38 d0f2c4c6 malc
#define DPRINTF(fmt, ...) \
39 c163b5ca lirans@il.ibm.com
    do { } while (0)
40 c163b5ca lirans@il.ibm.com
#endif
41 c163b5ca lirans@il.ibm.com
42 a55eb92c Jan Kiszka
typedef struct BlkMigDevState {
43 a55eb92c Jan Kiszka
    BlockDriverState *bs;
44 a55eb92c Jan Kiszka
    int bulk_completed;
45 a55eb92c Jan Kiszka
    int shared_base;
46 a55eb92c Jan Kiszka
    int64_t cur_sector;
47 d76cac7d Liran Schour
    int64_t cur_dirty;
48 82801d8f Jan Kiszka
    int64_t completed_sectors;
49 a55eb92c Jan Kiszka
    int64_t total_sectors;
50 a55eb92c Jan Kiszka
    int64_t dirty;
51 5e5328be Jan Kiszka
    QSIMPLEQ_ENTRY(BlkMigDevState) entry;
52 a55eb92c Jan Kiszka
} BlkMigDevState;
53 a55eb92c Jan Kiszka
54 c163b5ca lirans@il.ibm.com
typedef struct BlkMigBlock {
55 c163b5ca lirans@il.ibm.com
    uint8_t *buf;
56 c163b5ca lirans@il.ibm.com
    BlkMigDevState *bmds;
57 c163b5ca lirans@il.ibm.com
    int64_t sector;
58 c163b5ca lirans@il.ibm.com
    struct iovec iov;
59 c163b5ca lirans@il.ibm.com
    QEMUIOVector qiov;
60 c163b5ca lirans@il.ibm.com
    BlockDriverAIOCB *aiocb;
61 c163b5ca lirans@il.ibm.com
    int ret;
62 889ae39c Liran Schour
    int64_t time;
63 5e5328be Jan Kiszka
    QSIMPLEQ_ENTRY(BlkMigBlock) entry;
64 c163b5ca lirans@il.ibm.com
} BlkMigBlock;
65 c163b5ca lirans@il.ibm.com
66 c163b5ca lirans@il.ibm.com
typedef struct BlkMigState {
67 c163b5ca lirans@il.ibm.com
    int blk_enable;
68 c163b5ca lirans@il.ibm.com
    int shared_base;
69 5e5328be Jan Kiszka
    QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
70 5e5328be Jan Kiszka
    QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
71 c163b5ca lirans@il.ibm.com
    int submitted;
72 c163b5ca lirans@il.ibm.com
    int read_done;
73 c163b5ca lirans@il.ibm.com
    int transferred;
74 82801d8f Jan Kiszka
    int64_t total_sector_sum;
75 01e61e2d Jan Kiszka
    int prev_progress;
76 e970ec0b Liran Schour
    int bulk_completed;
77 889ae39c Liran Schour
    long double total_time;
78 889ae39c Liran Schour
    int reads;
79 c163b5ca lirans@il.ibm.com
} BlkMigState;
80 c163b5ca lirans@il.ibm.com
81 d11ecd3d Jan Kiszka
static BlkMigState block_mig_state;
82 c163b5ca lirans@il.ibm.com
83 13f0b67f Jan Kiszka
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
84 13f0b67f Jan Kiszka
{
85 13f0b67f Jan Kiszka
    int len;
86 13f0b67f Jan Kiszka
87 13f0b67f Jan Kiszka
    /* sector number and flags */
88 13f0b67f Jan Kiszka
    qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
89 13f0b67f Jan Kiszka
                     | BLK_MIG_FLAG_DEVICE_BLOCK);
90 13f0b67f Jan Kiszka
91 13f0b67f Jan Kiszka
    /* device name */
92 13f0b67f Jan Kiszka
    len = strlen(blk->bmds->bs->device_name);
93 13f0b67f Jan Kiszka
    qemu_put_byte(f, len);
94 13f0b67f Jan Kiszka
    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
95 13f0b67f Jan Kiszka
96 13f0b67f Jan Kiszka
    qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
97 13f0b67f Jan Kiszka
}
98 13f0b67f Jan Kiszka
99 25f23643 Jan Kiszka
int blk_mig_active(void)
100 25f23643 Jan Kiszka
{
101 25f23643 Jan Kiszka
    return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
102 25f23643 Jan Kiszka
}
103 25f23643 Jan Kiszka
104 25f23643 Jan Kiszka
uint64_t blk_mig_bytes_transferred(void)
105 25f23643 Jan Kiszka
{
106 25f23643 Jan Kiszka
    BlkMigDevState *bmds;
107 25f23643 Jan Kiszka
    uint64_t sum = 0;
108 25f23643 Jan Kiszka
109 25f23643 Jan Kiszka
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
110 25f23643 Jan Kiszka
        sum += bmds->completed_sectors;
111 25f23643 Jan Kiszka
    }
112 25f23643 Jan Kiszka
    return sum << BDRV_SECTOR_BITS;
113 25f23643 Jan Kiszka
}
114 25f23643 Jan Kiszka
115 25f23643 Jan Kiszka
uint64_t blk_mig_bytes_remaining(void)
116 25f23643 Jan Kiszka
{
117 25f23643 Jan Kiszka
    return blk_mig_bytes_total() - blk_mig_bytes_transferred();
118 25f23643 Jan Kiszka
}
119 25f23643 Jan Kiszka
120 25f23643 Jan Kiszka
uint64_t blk_mig_bytes_total(void)
121 25f23643 Jan Kiszka
{
122 25f23643 Jan Kiszka
    BlkMigDevState *bmds;
123 25f23643 Jan Kiszka
    uint64_t sum = 0;
124 25f23643 Jan Kiszka
125 25f23643 Jan Kiszka
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
126 25f23643 Jan Kiszka
        sum += bmds->total_sectors;
127 25f23643 Jan Kiszka
    }
128 25f23643 Jan Kiszka
    return sum << BDRV_SECTOR_BITS;
129 25f23643 Jan Kiszka
}
130 25f23643 Jan Kiszka
131 889ae39c Liran Schour
static inline void add_avg_read_time(int64_t time)
132 889ae39c Liran Schour
{
133 889ae39c Liran Schour
    block_mig_state.reads++;
134 889ae39c Liran Schour
    block_mig_state.total_time += time;
135 889ae39c Liran Schour
}
136 889ae39c Liran Schour
137 889ae39c Liran Schour
static inline long double compute_read_bwidth(void)
138 889ae39c Liran Schour
{
139 889ae39c Liran Schour
    assert(block_mig_state.total_time != 0);
140 889ae39c Liran Schour
    return  (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
141 889ae39c Liran Schour
}
142 889ae39c Liran Schour
143 c163b5ca lirans@il.ibm.com
static void blk_mig_read_cb(void *opaque, int ret)
144 c163b5ca lirans@il.ibm.com
{
145 c163b5ca lirans@il.ibm.com
    BlkMigBlock *blk = opaque;
146 a55eb92c Jan Kiszka
147 c163b5ca lirans@il.ibm.com
    blk->ret = ret;
148 a55eb92c Jan Kiszka
149 889ae39c Liran Schour
    blk->time = qemu_get_clock_ns(rt_clock) - blk->time;
150 889ae39c Liran Schour
151 889ae39c Liran Schour
    add_avg_read_time(blk->time);
152 889ae39c Liran Schour
153 5e5328be Jan Kiszka
    QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
154 a55eb92c Jan Kiszka
155 d11ecd3d Jan Kiszka
    block_mig_state.submitted--;
156 d11ecd3d Jan Kiszka
    block_mig_state.read_done++;
157 d11ecd3d Jan Kiszka
    assert(block_mig_state.submitted >= 0);
158 c163b5ca lirans@il.ibm.com
}
159 c163b5ca lirans@il.ibm.com
160 7184049e Jan Kiszka
static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
161 e970ec0b Liran Schour
                                BlkMigDevState *bmds)
162 a55eb92c Jan Kiszka
{
163 57cce12d Jan Kiszka
    int64_t total_sectors = bmds->total_sectors;
164 57cce12d Jan Kiszka
    int64_t cur_sector = bmds->cur_sector;
165 57cce12d Jan Kiszka
    BlockDriverState *bs = bmds->bs;
166 c163b5ca lirans@il.ibm.com
    BlkMigBlock *blk;
167 13f0b67f Jan Kiszka
    int nr_sectors;
168 a55eb92c Jan Kiszka
169 57cce12d Jan Kiszka
    if (bmds->shared_base) {
170 b1d10856 Jan Kiszka
        while (cur_sector < total_sectors &&
171 57cce12d Jan Kiszka
               !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
172 57cce12d Jan Kiszka
                                  &nr_sectors)) {
173 c163b5ca lirans@il.ibm.com
            cur_sector += nr_sectors;
174 c163b5ca lirans@il.ibm.com
        }
175 c163b5ca lirans@il.ibm.com
    }
176 a55eb92c Jan Kiszka
177 a55eb92c Jan Kiszka
    if (cur_sector >= total_sectors) {
178 82801d8f Jan Kiszka
        bmds->cur_sector = bmds->completed_sectors = total_sectors;
179 c163b5ca lirans@il.ibm.com
        return 1;
180 c163b5ca lirans@il.ibm.com
    }
181 a55eb92c Jan Kiszka
182 82801d8f Jan Kiszka
    bmds->completed_sectors = cur_sector;
183 a55eb92c Jan Kiszka
184 57cce12d Jan Kiszka
    cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
185 57cce12d Jan Kiszka
186 6ea44308 Jan Kiszka
    /* we are going to transfer a full block even if it is not allocated */
187 6ea44308 Jan Kiszka
    nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
188 c163b5ca lirans@il.ibm.com
189 6ea44308 Jan Kiszka
    if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
190 57cce12d Jan Kiszka
        nr_sectors = total_sectors - cur_sector;
191 c163b5ca lirans@il.ibm.com
    }
192 a55eb92c Jan Kiszka
193 13f0b67f Jan Kiszka
    blk = qemu_malloc(sizeof(BlkMigBlock));
194 13f0b67f Jan Kiszka
    blk->buf = qemu_malloc(BLOCK_SIZE);
195 13f0b67f Jan Kiszka
    blk->bmds = bmds;
196 13f0b67f Jan Kiszka
    blk->sector = cur_sector;
197 a55eb92c Jan Kiszka
198 e970ec0b Liran Schour
    blk->iov.iov_base = blk->buf;
199 e970ec0b Liran Schour
    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
200 e970ec0b Liran Schour
    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
201 a55eb92c Jan Kiszka
202 889ae39c Liran Schour
    blk->time = qemu_get_clock_ns(rt_clock);
203 889ae39c Liran Schour
204 e970ec0b Liran Schour
    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
205 e970ec0b Liran Schour
                                nr_sectors, blk_mig_read_cb, blk);
206 e970ec0b Liran Schour
    if (!blk->aiocb) {
207 e970ec0b Liran Schour
        goto error;
208 c163b5ca lirans@il.ibm.com
    }
209 e970ec0b Liran Schour
    block_mig_state.submitted++;
210 d76cac7d Liran Schour
211 13f0b67f Jan Kiszka
    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
212 13f0b67f Jan Kiszka
    bmds->cur_sector = cur_sector + nr_sectors;
213 a55eb92c Jan Kiszka
214 13f0b67f Jan Kiszka
    return (bmds->cur_sector >= total_sectors);
215 4b640365 Jan Kiszka
216 4b640365 Jan Kiszka
error:
217 7184049e Jan Kiszka
    monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector);
218 4b640365 Jan Kiszka
    qemu_file_set_error(f);
219 4b640365 Jan Kiszka
    qemu_free(blk->buf);
220 4b640365 Jan Kiszka
    qemu_free(blk);
221 4b640365 Jan Kiszka
    return 0;
222 c163b5ca lirans@il.ibm.com
}
223 c163b5ca lirans@il.ibm.com
224 c163b5ca lirans@il.ibm.com
static void set_dirty_tracking(int enable)
225 c163b5ca lirans@il.ibm.com
{
226 c163b5ca lirans@il.ibm.com
    BlkMigDevState *bmds;
227 5e5328be Jan Kiszka
228 5e5328be Jan Kiszka
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
229 a55eb92c Jan Kiszka
        bdrv_set_dirty_tracking(bmds->bs, enable);
230 c163b5ca lirans@il.ibm.com
    }
231 c163b5ca lirans@il.ibm.com
}
232 c163b5ca lirans@il.ibm.com
233 7184049e Jan Kiszka
static void init_blk_migration(Monitor *mon, QEMUFile *f)
234 c163b5ca lirans@il.ibm.com
{
235 5e5328be Jan Kiszka
    BlkMigDevState *bmds;
236 c163b5ca lirans@il.ibm.com
    BlockDriverState *bs;
237 792773b2 Jan Kiszka
    int64_t sectors;
238 a55eb92c Jan Kiszka
239 69d63a97 Jan Kiszka
    block_mig_state.submitted = 0;
240 69d63a97 Jan Kiszka
    block_mig_state.read_done = 0;
241 69d63a97 Jan Kiszka
    block_mig_state.transferred = 0;
242 82801d8f Jan Kiszka
    block_mig_state.total_sector_sum = 0;
243 01e61e2d Jan Kiszka
    block_mig_state.prev_progress = -1;
244 e970ec0b Liran Schour
    block_mig_state.bulk_completed = 0;
245 889ae39c Liran Schour
    block_mig_state.total_time = 0;
246 889ae39c Liran Schour
    block_mig_state.reads = 0;
247 69d63a97 Jan Kiszka
248 c163b5ca lirans@il.ibm.com
    for (bs = bdrv_first; bs != NULL; bs = bs->next) {
249 a55eb92c Jan Kiszka
        if (bs->type == BDRV_TYPE_HD) {
250 792773b2 Jan Kiszka
            sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
251 792773b2 Jan Kiszka
            if (sectors == 0) {
252 792773b2 Jan Kiszka
                continue;
253 792773b2 Jan Kiszka
            }
254 792773b2 Jan Kiszka
255 c163b5ca lirans@il.ibm.com
            bmds = qemu_mallocz(sizeof(BlkMigDevState));
256 c163b5ca lirans@il.ibm.com
            bmds->bs = bs;
257 c163b5ca lirans@il.ibm.com
            bmds->bulk_completed = 0;
258 792773b2 Jan Kiszka
            bmds->total_sectors = sectors;
259 82801d8f Jan Kiszka
            bmds->completed_sectors = 0;
260 d11ecd3d Jan Kiszka
            bmds->shared_base = block_mig_state.shared_base;
261 a55eb92c Jan Kiszka
262 792773b2 Jan Kiszka
            block_mig_state.total_sector_sum += sectors;
263 82801d8f Jan Kiszka
264 a55eb92c Jan Kiszka
            if (bmds->shared_base) {
265 7184049e Jan Kiszka
                monitor_printf(mon, "Start migration for %s with shared base "
266 7184049e Jan Kiszka
                                    "image\n",
267 7184049e Jan Kiszka
                               bs->device_name);
268 c163b5ca lirans@il.ibm.com
            } else {
269 7184049e Jan Kiszka
                monitor_printf(mon, "Start full migration for %s\n",
270 7184049e Jan Kiszka
                               bs->device_name);
271 c163b5ca lirans@il.ibm.com
            }
272 a55eb92c Jan Kiszka
273 5e5328be Jan Kiszka
            QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
274 c163b5ca lirans@il.ibm.com
        }
275 a55eb92c Jan Kiszka
    }
276 c163b5ca lirans@il.ibm.com
}
277 c163b5ca lirans@il.ibm.com
278 e970ec0b Liran Schour
static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
279 c163b5ca lirans@il.ibm.com
{
280 82801d8f Jan Kiszka
    int64_t completed_sector_sum = 0;
281 c163b5ca lirans@il.ibm.com
    BlkMigDevState *bmds;
282 01e61e2d Jan Kiszka
    int progress;
283 82801d8f Jan Kiszka
    int ret = 0;
284 c163b5ca lirans@il.ibm.com
285 5e5328be Jan Kiszka
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
286 a55eb92c Jan Kiszka
        if (bmds->bulk_completed == 0) {
287 e970ec0b Liran Schour
            if (mig_save_device_bulk(mon, f, bmds) == 1) {
288 57cce12d Jan Kiszka
                /* completed bulk section for this device */
289 57cce12d Jan Kiszka
                bmds->bulk_completed = 1;
290 c163b5ca lirans@il.ibm.com
            }
291 82801d8f Jan Kiszka
            completed_sector_sum += bmds->completed_sectors;
292 82801d8f Jan Kiszka
            ret = 1;
293 82801d8f Jan Kiszka
            break;
294 82801d8f Jan Kiszka
        } else {
295 82801d8f Jan Kiszka
            completed_sector_sum += bmds->completed_sectors;
296 c163b5ca lirans@il.ibm.com
        }
297 c163b5ca lirans@il.ibm.com
    }
298 a55eb92c Jan Kiszka
299 01e61e2d Jan Kiszka
    progress = completed_sector_sum * 100 / block_mig_state.total_sector_sum;
300 01e61e2d Jan Kiszka
    if (progress != block_mig_state.prev_progress) {
301 01e61e2d Jan Kiszka
        block_mig_state.prev_progress = progress;
302 01e61e2d Jan Kiszka
        qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
303 01e61e2d Jan Kiszka
                         | BLK_MIG_FLAG_PROGRESS);
304 01e61e2d Jan Kiszka
        monitor_printf(mon, "Completed %d %%\r", progress);
305 7184049e Jan Kiszka
        monitor_flush(mon);
306 82801d8f Jan Kiszka
    }
307 82801d8f Jan Kiszka
308 82801d8f Jan Kiszka
    return ret;
309 c163b5ca lirans@il.ibm.com
}
310 c163b5ca lirans@il.ibm.com
311 d76cac7d Liran Schour
static void blk_mig_reset_dirty_cursor(void)
312 c163b5ca lirans@il.ibm.com
{
313 c163b5ca lirans@il.ibm.com
    BlkMigDevState *bmds;
314 d76cac7d Liran Schour
315 d76cac7d Liran Schour
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
316 d76cac7d Liran Schour
        bmds->cur_dirty = 0;
317 d76cac7d Liran Schour
    }
318 d76cac7d Liran Schour
}
319 d76cac7d Liran Schour
320 d76cac7d Liran Schour
static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
321 d76cac7d Liran Schour
                                 BlkMigDevState *bmds, int is_async)
322 d76cac7d Liran Schour
{
323 d76cac7d Liran Schour
    BlkMigBlock *blk;
324 d76cac7d Liran Schour
    int64_t total_sectors = bmds->total_sectors;
325 c163b5ca lirans@il.ibm.com
    int64_t sector;
326 d76cac7d Liran Schour
    int nr_sectors;
327 a55eb92c Jan Kiszka
328 d76cac7d Liran Schour
    for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
329 d76cac7d Liran Schour
        if (bdrv_get_dirty(bmds->bs, sector)) {
330 575a58d7 Jan Kiszka
331 d76cac7d Liran Schour
            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
332 d76cac7d Liran Schour
                nr_sectors = total_sectors - sector;
333 d76cac7d Liran Schour
            } else {
334 d76cac7d Liran Schour
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
335 d76cac7d Liran Schour
            }
336 d76cac7d Liran Schour
            blk = qemu_malloc(sizeof(BlkMigBlock));
337 d76cac7d Liran Schour
            blk->buf = qemu_malloc(BLOCK_SIZE);
338 d76cac7d Liran Schour
            blk->bmds = bmds;
339 d76cac7d Liran Schour
            blk->sector = sector;
340 d76cac7d Liran Schour
341 889ae39c Liran Schour
            if (is_async) {
342 d76cac7d Liran Schour
                blk->iov.iov_base = blk->buf;
343 d76cac7d Liran Schour
                blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
344 d76cac7d Liran Schour
                qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
345 d76cac7d Liran Schour
346 889ae39c Liran Schour
                blk->time = qemu_get_clock_ns(rt_clock);
347 889ae39c Liran Schour
348 d76cac7d Liran Schour
                blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
349 d76cac7d Liran Schour
                                            nr_sectors, blk_mig_read_cb, blk);
350 d76cac7d Liran Schour
                if (!blk->aiocb) {
351 d76cac7d Liran Schour
                    goto error;
352 d76cac7d Liran Schour
                }
353 d76cac7d Liran Schour
                block_mig_state.submitted++;
354 d76cac7d Liran Schour
            } else {
355 d76cac7d Liran Schour
                if (bdrv_read(bmds->bs, sector, blk->buf,
356 d76cac7d Liran Schour
                              nr_sectors) < 0) {
357 d76cac7d Liran Schour
                    goto error;
358 c163b5ca lirans@il.ibm.com
                }
359 d76cac7d Liran Schour
                blk_send(f, blk);
360 a55eb92c Jan Kiszka
361 d76cac7d Liran Schour
                qemu_free(blk->buf);
362 d76cac7d Liran Schour
                qemu_free(blk);
363 a55eb92c Jan Kiszka
            }
364 d76cac7d Liran Schour
365 d76cac7d Liran Schour
            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
366 d76cac7d Liran Schour
            break;
367 c163b5ca lirans@il.ibm.com
        }
368 d76cac7d Liran Schour
        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
369 d76cac7d Liran Schour
        bmds->cur_dirty = sector;
370 c163b5ca lirans@il.ibm.com
    }
371 575a58d7 Jan Kiszka
372 d76cac7d Liran Schour
    return (bmds->cur_dirty >= bmds->total_sectors);
373 d76cac7d Liran Schour
374 889ae39c Liran Schour
error:
375 d76cac7d Liran Schour
    monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
376 d76cac7d Liran Schour
    qemu_file_set_error(f);
377 d76cac7d Liran Schour
    qemu_free(blk->buf);
378 d76cac7d Liran Schour
    qemu_free(blk);
379 d76cac7d Liran Schour
    return 0;
380 d76cac7d Liran Schour
}
381 d76cac7d Liran Schour
382 d76cac7d Liran Schour
static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
383 d76cac7d Liran Schour
{
384 d76cac7d Liran Schour
    BlkMigDevState *bmds;
385 d76cac7d Liran Schour
    int ret = 0;
386 d76cac7d Liran Schour
387 d76cac7d Liran Schour
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
388 889ae39c Liran Schour
        if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
389 d76cac7d Liran Schour
            ret = 1;
390 d76cac7d Liran Schour
            break;
391 d76cac7d Liran Schour
        }
392 d76cac7d Liran Schour
    }
393 d76cac7d Liran Schour
394 d76cac7d Liran Schour
    return ret;
395 c163b5ca lirans@il.ibm.com
}
396 c163b5ca lirans@il.ibm.com
397 c163b5ca lirans@il.ibm.com
static void flush_blks(QEMUFile* f)
398 c163b5ca lirans@il.ibm.com
{
399 5e5328be Jan Kiszka
    BlkMigBlock *blk;
400 a55eb92c Jan Kiszka
401 d0f2c4c6 malc
    DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
402 d11ecd3d Jan Kiszka
            __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
403 d11ecd3d Jan Kiszka
            block_mig_state.transferred);
404 a55eb92c Jan Kiszka
405 5e5328be Jan Kiszka
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
406 5e5328be Jan Kiszka
        if (qemu_file_rate_limit(f)) {
407 5e5328be Jan Kiszka
            break;
408 5e5328be Jan Kiszka
        }
409 4b640365 Jan Kiszka
        if (blk->ret < 0) {
410 4b640365 Jan Kiszka
            qemu_file_set_error(f);
411 4b640365 Jan Kiszka
            break;
412 4b640365 Jan Kiszka
        }
413 13f0b67f Jan Kiszka
        blk_send(f, blk);
414 a55eb92c Jan Kiszka
415 5e5328be Jan Kiszka
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
416 c163b5ca lirans@il.ibm.com
        qemu_free(blk->buf);
417 c163b5ca lirans@il.ibm.com
        qemu_free(blk);
418 a55eb92c Jan Kiszka
419 d11ecd3d Jan Kiszka
        block_mig_state.read_done--;
420 d11ecd3d Jan Kiszka
        block_mig_state.transferred++;
421 d11ecd3d Jan Kiszka
        assert(block_mig_state.read_done >= 0);
422 c163b5ca lirans@il.ibm.com
    }
423 c163b5ca lirans@il.ibm.com
424 d0f2c4c6 malc
    DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
425 d11ecd3d Jan Kiszka
            block_mig_state.submitted, block_mig_state.read_done,
426 d11ecd3d Jan Kiszka
            block_mig_state.transferred);
427 c163b5ca lirans@il.ibm.com
}
428 c163b5ca lirans@il.ibm.com
429 889ae39c Liran Schour
static int64_t get_remaining_dirty(void)
430 889ae39c Liran Schour
{
431 889ae39c Liran Schour
    BlkMigDevState *bmds;
432 889ae39c Liran Schour
    int64_t dirty = 0;
433 889ae39c Liran Schour
434 889ae39c Liran Schour
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
435 889ae39c Liran Schour
        dirty += bdrv_get_dirty_count(bmds->bs);
436 889ae39c Liran Schour
    }
437 889ae39c Liran Schour
438 889ae39c Liran Schour
    return dirty * BLOCK_SIZE;
439 889ae39c Liran Schour
}
440 889ae39c Liran Schour
441 c163b5ca lirans@il.ibm.com
static int is_stage2_completed(void)
442 c163b5ca lirans@il.ibm.com
{
443 889ae39c Liran Schour
    int64_t remaining_dirty;
444 889ae39c Liran Schour
    long double bwidth;
445 889ae39c Liran Schour
446 889ae39c Liran Schour
    if (block_mig_state.bulk_completed == 1) {
447 889ae39c Liran Schour
448 889ae39c Liran Schour
        remaining_dirty = get_remaining_dirty();
449 889ae39c Liran Schour
        if (remaining_dirty == 0) {
450 889ae39c Liran Schour
            return 1;
451 889ae39c Liran Schour
        }
452 889ae39c Liran Schour
453 889ae39c Liran Schour
        bwidth = compute_read_bwidth();
454 889ae39c Liran Schour
455 889ae39c Liran Schour
        if ((remaining_dirty / bwidth) <=
456 889ae39c Liran Schour
            migrate_max_downtime()) {
457 889ae39c Liran Schour
            /* finish stage2 because we think that we can finish remaing work
458 889ae39c Liran Schour
               below max_downtime */
459 889ae39c Liran Schour
460 889ae39c Liran Schour
            return 1;
461 889ae39c Liran Schour
        }
462 889ae39c Liran Schour
    }
463 889ae39c Liran Schour
464 889ae39c Liran Schour
    return 0;
465 c163b5ca lirans@il.ibm.com
}
466 c163b5ca lirans@il.ibm.com
467 7184049e Jan Kiszka
static void blk_mig_cleanup(Monitor *mon)
468 4ec7fcc7 Jan Kiszka
{
469 82801d8f Jan Kiszka
    BlkMigDevState *bmds;
470 82801d8f Jan Kiszka
    BlkMigBlock *blk;
471 4ec7fcc7 Jan Kiszka
472 82801d8f Jan Kiszka
    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
473 82801d8f Jan Kiszka
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
474 4ec7fcc7 Jan Kiszka
        qemu_free(bmds);
475 4ec7fcc7 Jan Kiszka
    }
476 4ec7fcc7 Jan Kiszka
477 82801d8f Jan Kiszka
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
478 82801d8f Jan Kiszka
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
479 4ec7fcc7 Jan Kiszka
        qemu_free(blk->buf);
480 4ec7fcc7 Jan Kiszka
        qemu_free(blk);
481 4ec7fcc7 Jan Kiszka
    }
482 4ec7fcc7 Jan Kiszka
483 4ec7fcc7 Jan Kiszka
    set_dirty_tracking(0);
484 4ec7fcc7 Jan Kiszka
485 7184049e Jan Kiszka
    monitor_printf(mon, "\n");
486 4ec7fcc7 Jan Kiszka
}
487 4ec7fcc7 Jan Kiszka
488 f327aa0c Jan Kiszka
static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
489 c163b5ca lirans@il.ibm.com
{
490 d0f2c4c6 malc
    DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
491 d11ecd3d Jan Kiszka
            stage, block_mig_state.submitted, block_mig_state.transferred);
492 a55eb92c Jan Kiszka
493 4ec7fcc7 Jan Kiszka
    if (stage < 0) {
494 7184049e Jan Kiszka
        blk_mig_cleanup(mon);
495 4ec7fcc7 Jan Kiszka
        return 0;
496 4ec7fcc7 Jan Kiszka
    }
497 4ec7fcc7 Jan Kiszka
498 d11ecd3d Jan Kiszka
    if (block_mig_state.blk_enable != 1) {
499 c163b5ca lirans@il.ibm.com
        /* no need to migrate storage */
500 a55eb92c Jan Kiszka
        qemu_put_be64(f, BLK_MIG_FLAG_EOS);
501 c163b5ca lirans@il.ibm.com
        return 1;
502 c163b5ca lirans@il.ibm.com
    }
503 a55eb92c Jan Kiszka
504 a55eb92c Jan Kiszka
    if (stage == 1) {
505 7184049e Jan Kiszka
        init_blk_migration(mon, f);
506 a55eb92c Jan Kiszka
507 c163b5ca lirans@il.ibm.com
        /* start track dirty blocks */
508 c163b5ca lirans@il.ibm.com
        set_dirty_tracking(1);
509 c163b5ca lirans@il.ibm.com
    }
510 c163b5ca lirans@il.ibm.com
511 c163b5ca lirans@il.ibm.com
    flush_blks(f);
512 a55eb92c Jan Kiszka
513 4b640365 Jan Kiszka
    if (qemu_file_has_error(f)) {
514 7184049e Jan Kiszka
        blk_mig_cleanup(mon);
515 4b640365 Jan Kiszka
        return 0;
516 4b640365 Jan Kiszka
    }
517 4b640365 Jan Kiszka
518 d76cac7d Liran Schour
    blk_mig_reset_dirty_cursor();
519 d76cac7d Liran Schour
520 889ae39c Liran Schour
    if (stage == 2) {
521 d76cac7d Liran Schour
        /* control the rate of transfer */
522 d76cac7d Liran Schour
        while ((block_mig_state.submitted +
523 d76cac7d Liran Schour
                block_mig_state.read_done) * BLOCK_SIZE <
524 d76cac7d Liran Schour
               qemu_file_get_rate_limit(f)) {
525 d76cac7d Liran Schour
            if (block_mig_state.bulk_completed == 0) {
526 d76cac7d Liran Schour
                /* first finish the bulk phase */
527 d76cac7d Liran Schour
                if (blk_mig_save_bulked_block(mon, f) == 0) {
528 889ae39c Liran Schour
                    /* finished saving bulk on all devices */
529 d76cac7d Liran Schour
                    block_mig_state.bulk_completed = 1;
530 d76cac7d Liran Schour
                }
531 d76cac7d Liran Schour
            } else {
532 d76cac7d Liran Schour
                if (blk_mig_save_dirty_block(mon, f, 1) == 0) {
533 d76cac7d Liran Schour
                    /* no more dirty blocks */
534 d76cac7d Liran Schour
                    break;
535 d76cac7d Liran Schour
                }
536 d76cac7d Liran Schour
            }
537 a55eb92c Jan Kiszka
        }
538 a55eb92c Jan Kiszka
539 d76cac7d Liran Schour
        flush_blks(f);
540 a55eb92c Jan Kiszka
541 d76cac7d Liran Schour
        if (qemu_file_has_error(f)) {
542 d76cac7d Liran Schour
            blk_mig_cleanup(mon);
543 d76cac7d Liran Schour
            return 0;
544 d76cac7d Liran Schour
        }
545 4b640365 Jan Kiszka
    }
546 4b640365 Jan Kiszka
547 a55eb92c Jan Kiszka
    if (stage == 3) {
548 889ae39c Liran Schour
        /* we know for sure that save bulk is completed and
549 889ae39c Liran Schour
           all async read completed */
550 889ae39c Liran Schour
        assert(block_mig_state.submitted == 0);
551 a55eb92c Jan Kiszka
552 889ae39c Liran Schour
        while (blk_mig_save_dirty_block(mon, f, 0) != 0);
553 7184049e Jan Kiszka
        blk_mig_cleanup(mon);
554 a55eb92c Jan Kiszka
555 01e61e2d Jan Kiszka
        /* report completion */
556 01e61e2d Jan Kiszka
        qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
557 01e61e2d Jan Kiszka
558 4b640365 Jan Kiszka
        if (qemu_file_has_error(f)) {
559 4b640365 Jan Kiszka
            return 0;
560 4b640365 Jan Kiszka
        }
561 4b640365 Jan Kiszka
562 7184049e Jan Kiszka
        monitor_printf(mon, "Block migration completed\n");
563 c163b5ca lirans@il.ibm.com
    }
564 a55eb92c Jan Kiszka
565 a55eb92c Jan Kiszka
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
566 a55eb92c Jan Kiszka
567 c163b5ca lirans@il.ibm.com
    return ((stage == 2) && is_stage2_completed());
568 c163b5ca lirans@il.ibm.com
}
569 c163b5ca lirans@il.ibm.com
570 c163b5ca lirans@il.ibm.com
static int block_load(QEMUFile *f, void *opaque, int version_id)
571 c163b5ca lirans@il.ibm.com
{
572 01e61e2d Jan Kiszka
    static int banner_printed;
573 c163b5ca lirans@il.ibm.com
    int len, flags;
574 c163b5ca lirans@il.ibm.com
    char device_name[256];
575 c163b5ca lirans@il.ibm.com
    int64_t addr;
576 c163b5ca lirans@il.ibm.com
    BlockDriverState *bs;
577 c163b5ca lirans@il.ibm.com
    uint8_t *buf;
578 a55eb92c Jan Kiszka
579 c163b5ca lirans@il.ibm.com
    do {
580 c163b5ca lirans@il.ibm.com
        addr = qemu_get_be64(f);
581 a55eb92c Jan Kiszka
582 6ea44308 Jan Kiszka
        flags = addr & ~BDRV_SECTOR_MASK;
583 6ea44308 Jan Kiszka
        addr >>= BDRV_SECTOR_BITS;
584 a55eb92c Jan Kiszka
585 a55eb92c Jan Kiszka
        if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
586 c163b5ca lirans@il.ibm.com
            /* get device name */
587 c163b5ca lirans@il.ibm.com
            len = qemu_get_byte(f);
588 c163b5ca lirans@il.ibm.com
            qemu_get_buffer(f, (uint8_t *)device_name, len);
589 c163b5ca lirans@il.ibm.com
            device_name[len] = '\0';
590 a55eb92c Jan Kiszka
591 c163b5ca lirans@il.ibm.com
            bs = bdrv_find(device_name);
592 4b640365 Jan Kiszka
            if (!bs) {
593 4b640365 Jan Kiszka
                fprintf(stderr, "Error unknown block device %s\n",
594 4b640365 Jan Kiszka
                        device_name);
595 4b640365 Jan Kiszka
                return -EINVAL;
596 4b640365 Jan Kiszka
            }
597 a55eb92c Jan Kiszka
598 575a58d7 Jan Kiszka
            buf = qemu_malloc(BLOCK_SIZE);
599 575a58d7 Jan Kiszka
600 a55eb92c Jan Kiszka
            qemu_get_buffer(f, buf, BLOCK_SIZE);
601 4b640365 Jan Kiszka
            bdrv_write(bs, addr, buf, BDRV_SECTORS_PER_DIRTY_CHUNK);
602 575a58d7 Jan Kiszka
603 575a58d7 Jan Kiszka
            qemu_free(buf);
604 01e61e2d Jan Kiszka
        } else if (flags & BLK_MIG_FLAG_PROGRESS) {
605 01e61e2d Jan Kiszka
            if (!banner_printed) {
606 01e61e2d Jan Kiszka
                printf("Receiving block device images\n");
607 01e61e2d Jan Kiszka
                banner_printed = 1;
608 01e61e2d Jan Kiszka
            }
609 01e61e2d Jan Kiszka
            printf("Completed %d %%%c", (int)addr,
610 01e61e2d Jan Kiszka
                   (addr == 100) ? '\n' : '\r');
611 01e61e2d Jan Kiszka
            fflush(stdout);
612 a55eb92c Jan Kiszka
        } else if (!(flags & BLK_MIG_FLAG_EOS)) {
613 4b640365 Jan Kiszka
            fprintf(stderr, "Unknown flags\n");
614 4b640365 Jan Kiszka
            return -EINVAL;
615 4b640365 Jan Kiszka
        }
616 4b640365 Jan Kiszka
        if (qemu_file_has_error(f)) {
617 4b640365 Jan Kiszka
            return -EIO;
618 c163b5ca lirans@il.ibm.com
        }
619 a55eb92c Jan Kiszka
    } while (!(flags & BLK_MIG_FLAG_EOS));
620 a55eb92c Jan Kiszka
621 c163b5ca lirans@il.ibm.com
    return 0;
622 c163b5ca lirans@il.ibm.com
}
623 c163b5ca lirans@il.ibm.com
624 c163b5ca lirans@il.ibm.com
static void block_set_params(int blk_enable, int shared_base, void *opaque)
625 c163b5ca lirans@il.ibm.com
{
626 d11ecd3d Jan Kiszka
    block_mig_state.blk_enable = blk_enable;
627 d11ecd3d Jan Kiszka
    block_mig_state.shared_base = shared_base;
628 a55eb92c Jan Kiszka
629 c163b5ca lirans@il.ibm.com
    /* shared base means that blk_enable = 1 */
630 d11ecd3d Jan Kiszka
    block_mig_state.blk_enable |= shared_base;
631 c163b5ca lirans@il.ibm.com
}
632 c163b5ca lirans@il.ibm.com
633 c163b5ca lirans@il.ibm.com
void blk_mig_init(void)
634 a55eb92c Jan Kiszka
{
635 5e5328be Jan Kiszka
    QSIMPLEQ_INIT(&block_mig_state.bmds_list);
636 5e5328be Jan Kiszka
    QSIMPLEQ_INIT(&block_mig_state.blk_list);
637 5e5328be Jan Kiszka
638 a55eb92c Jan Kiszka
    register_savevm_live("block", 0, 1, block_set_params, block_save_live,
639 d11ecd3d Jan Kiszka
                         NULL, block_load, &block_mig_state);
640 c163b5ca lirans@il.ibm.com
}