Statistics
| Branch: | Revision:

root / block-migration.c @ 3329f07b

History | View | Annotate | Download (16.9 kB)

1 c163b5ca lirans@il.ibm.com
/*
2 c163b5ca lirans@il.ibm.com
 * QEMU live block migration
3 c163b5ca lirans@il.ibm.com
 *
4 c163b5ca lirans@il.ibm.com
 * Copyright IBM, Corp. 2009
5 c163b5ca lirans@il.ibm.com
 *
6 c163b5ca lirans@il.ibm.com
 * Authors:
7 c163b5ca lirans@il.ibm.com
 *  Liran Schour   <lirans@il.ibm.com>
8 c163b5ca lirans@il.ibm.com
 *
9 c163b5ca lirans@il.ibm.com
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10 c163b5ca lirans@il.ibm.com
 * the COPYING file in the top-level directory.
11 c163b5ca lirans@il.ibm.com
 *
12 c163b5ca lirans@il.ibm.com
 */
13 c163b5ca lirans@il.ibm.com
14 c163b5ca lirans@il.ibm.com
#include "qemu-common.h"
15 c163b5ca lirans@il.ibm.com
#include "block_int.h"
16 c163b5ca lirans@il.ibm.com
#include "hw/hw.h"
17 5e5328be Jan Kiszka
#include "qemu-queue.h"
18 889ae39c Liran Schour
#include "qemu-timer.h"
19 7184049e Jan Kiszka
#include "monitor.h"
20 c163b5ca lirans@il.ibm.com
#include "block-migration.h"
21 889ae39c Liran Schour
#include "migration.h"
22 c163b5ca lirans@il.ibm.com
#include <assert.h>
23 c163b5ca lirans@il.ibm.com
24 6ea44308 Jan Kiszka
#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
25 c163b5ca lirans@il.ibm.com
26 c163b5ca lirans@il.ibm.com
#define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
27 c163b5ca lirans@il.ibm.com
#define BLK_MIG_FLAG_EOS                0x02
28 01e61e2d Jan Kiszka
#define BLK_MIG_FLAG_PROGRESS           0x04
29 c163b5ca lirans@il.ibm.com
30 c163b5ca lirans@il.ibm.com
#define MAX_IS_ALLOCATED_SEARCH 65536
31 c163b5ca lirans@il.ibm.com
32 c163b5ca lirans@il.ibm.com
//#define DEBUG_BLK_MIGRATION
33 c163b5ca lirans@il.ibm.com
34 c163b5ca lirans@il.ibm.com
#ifdef DEBUG_BLK_MIGRATION
35 d0f2c4c6 malc
#define DPRINTF(fmt, ...) \
36 c163b5ca lirans@il.ibm.com
    do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
37 c163b5ca lirans@il.ibm.com
#else
38 d0f2c4c6 malc
#define DPRINTF(fmt, ...) \
39 c163b5ca lirans@il.ibm.com
    do { } while (0)
40 c163b5ca lirans@il.ibm.com
#endif
41 c163b5ca lirans@il.ibm.com
42 a55eb92c Jan Kiszka
typedef struct BlkMigDevState {
43 a55eb92c Jan Kiszka
    BlockDriverState *bs;
44 a55eb92c Jan Kiszka
    int bulk_completed;
45 a55eb92c Jan Kiszka
    int shared_base;
46 a55eb92c Jan Kiszka
    int64_t cur_sector;
47 d76cac7d Liran Schour
    int64_t cur_dirty;
48 82801d8f Jan Kiszka
    int64_t completed_sectors;
49 a55eb92c Jan Kiszka
    int64_t total_sectors;
50 a55eb92c Jan Kiszka
    int64_t dirty;
51 5e5328be Jan Kiszka
    QSIMPLEQ_ENTRY(BlkMigDevState) entry;
52 a55eb92c Jan Kiszka
} BlkMigDevState;
53 a55eb92c Jan Kiszka
54 c163b5ca lirans@il.ibm.com
typedef struct BlkMigBlock {
55 c163b5ca lirans@il.ibm.com
    uint8_t *buf;
56 c163b5ca lirans@il.ibm.com
    BlkMigDevState *bmds;
57 c163b5ca lirans@il.ibm.com
    int64_t sector;
58 c163b5ca lirans@il.ibm.com
    struct iovec iov;
59 c163b5ca lirans@il.ibm.com
    QEMUIOVector qiov;
60 c163b5ca lirans@il.ibm.com
    BlockDriverAIOCB *aiocb;
61 c163b5ca lirans@il.ibm.com
    int ret;
62 889ae39c Liran Schour
    int64_t time;
63 5e5328be Jan Kiszka
    QSIMPLEQ_ENTRY(BlkMigBlock) entry;
64 c163b5ca lirans@il.ibm.com
} BlkMigBlock;
65 c163b5ca lirans@il.ibm.com
66 c163b5ca lirans@il.ibm.com
typedef struct BlkMigState {
67 c163b5ca lirans@il.ibm.com
    int blk_enable;
68 c163b5ca lirans@il.ibm.com
    int shared_base;
69 5e5328be Jan Kiszka
    QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
70 5e5328be Jan Kiszka
    QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
71 c163b5ca lirans@il.ibm.com
    int submitted;
72 c163b5ca lirans@il.ibm.com
    int read_done;
73 c163b5ca lirans@il.ibm.com
    int transferred;
74 82801d8f Jan Kiszka
    int64_t total_sector_sum;
75 01e61e2d Jan Kiszka
    int prev_progress;
76 e970ec0b Liran Schour
    int bulk_completed;
77 889ae39c Liran Schour
    long double total_time;
78 889ae39c Liran Schour
    int reads;
79 c163b5ca lirans@il.ibm.com
} BlkMigState;
80 c163b5ca lirans@il.ibm.com
81 d11ecd3d Jan Kiszka
static BlkMigState block_mig_state;
82 c163b5ca lirans@il.ibm.com
83 13f0b67f Jan Kiszka
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
84 13f0b67f Jan Kiszka
{
85 13f0b67f Jan Kiszka
    int len;
86 13f0b67f Jan Kiszka
87 13f0b67f Jan Kiszka
    /* sector number and flags */
88 13f0b67f Jan Kiszka
    qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
89 13f0b67f Jan Kiszka
                     | BLK_MIG_FLAG_DEVICE_BLOCK);
90 13f0b67f Jan Kiszka
91 13f0b67f Jan Kiszka
    /* device name */
92 13f0b67f Jan Kiszka
    len = strlen(blk->bmds->bs->device_name);
93 13f0b67f Jan Kiszka
    qemu_put_byte(f, len);
94 13f0b67f Jan Kiszka
    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
95 13f0b67f Jan Kiszka
96 13f0b67f Jan Kiszka
    qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
97 13f0b67f Jan Kiszka
}
98 13f0b67f Jan Kiszka
99 25f23643 Jan Kiszka
int blk_mig_active(void)
100 25f23643 Jan Kiszka
{
101 25f23643 Jan Kiszka
    return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
102 25f23643 Jan Kiszka
}
103 25f23643 Jan Kiszka
104 25f23643 Jan Kiszka
uint64_t blk_mig_bytes_transferred(void)
105 25f23643 Jan Kiszka
{
106 25f23643 Jan Kiszka
    BlkMigDevState *bmds;
107 25f23643 Jan Kiszka
    uint64_t sum = 0;
108 25f23643 Jan Kiszka
109 25f23643 Jan Kiszka
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
110 25f23643 Jan Kiszka
        sum += bmds->completed_sectors;
111 25f23643 Jan Kiszka
    }
112 25f23643 Jan Kiszka
    return sum << BDRV_SECTOR_BITS;
113 25f23643 Jan Kiszka
}
114 25f23643 Jan Kiszka
115 25f23643 Jan Kiszka
uint64_t blk_mig_bytes_remaining(void)
116 25f23643 Jan Kiszka
{
117 25f23643 Jan Kiszka
    return blk_mig_bytes_total() - blk_mig_bytes_transferred();
118 25f23643 Jan Kiszka
}
119 25f23643 Jan Kiszka
120 25f23643 Jan Kiszka
uint64_t blk_mig_bytes_total(void)
121 25f23643 Jan Kiszka
{
122 25f23643 Jan Kiszka
    BlkMigDevState *bmds;
123 25f23643 Jan Kiszka
    uint64_t sum = 0;
124 25f23643 Jan Kiszka
125 25f23643 Jan Kiszka
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
126 25f23643 Jan Kiszka
        sum += bmds->total_sectors;
127 25f23643 Jan Kiszka
    }
128 25f23643 Jan Kiszka
    return sum << BDRV_SECTOR_BITS;
129 25f23643 Jan Kiszka
}
130 25f23643 Jan Kiszka
131 889ae39c Liran Schour
static inline void add_avg_read_time(int64_t time)
132 889ae39c Liran Schour
{
133 889ae39c Liran Schour
    block_mig_state.reads++;
134 889ae39c Liran Schour
    block_mig_state.total_time += time;
135 889ae39c Liran Schour
}
136 889ae39c Liran Schour
137 889ae39c Liran Schour
static inline long double compute_read_bwidth(void)
138 889ae39c Liran Schour
{
139 889ae39c Liran Schour
    assert(block_mig_state.total_time != 0);
140 889ae39c Liran Schour
    return  (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time;
141 889ae39c Liran Schour
}
142 889ae39c Liran Schour
143 c163b5ca lirans@il.ibm.com
static void blk_mig_read_cb(void *opaque, int ret)
144 c163b5ca lirans@il.ibm.com
{
145 c163b5ca lirans@il.ibm.com
    BlkMigBlock *blk = opaque;
146 a55eb92c Jan Kiszka
147 c163b5ca lirans@il.ibm.com
    blk->ret = ret;
148 a55eb92c Jan Kiszka
149 889ae39c Liran Schour
    blk->time = qemu_get_clock_ns(rt_clock) - blk->time;
150 889ae39c Liran Schour
151 889ae39c Liran Schour
    add_avg_read_time(blk->time);
152 889ae39c Liran Schour
153 5e5328be Jan Kiszka
    QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
154 a55eb92c Jan Kiszka
155 d11ecd3d Jan Kiszka
    block_mig_state.submitted--;
156 d11ecd3d Jan Kiszka
    block_mig_state.read_done++;
157 d11ecd3d Jan Kiszka
    assert(block_mig_state.submitted >= 0);
158 c163b5ca lirans@il.ibm.com
}
159 c163b5ca lirans@il.ibm.com
160 7184049e Jan Kiszka
static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
161 e970ec0b Liran Schour
                                BlkMigDevState *bmds)
162 a55eb92c Jan Kiszka
{
163 57cce12d Jan Kiszka
    int64_t total_sectors = bmds->total_sectors;
164 57cce12d Jan Kiszka
    int64_t cur_sector = bmds->cur_sector;
165 57cce12d Jan Kiszka
    BlockDriverState *bs = bmds->bs;
166 c163b5ca lirans@il.ibm.com
    BlkMigBlock *blk;
167 13f0b67f Jan Kiszka
    int nr_sectors;
168 a55eb92c Jan Kiszka
169 57cce12d Jan Kiszka
    if (bmds->shared_base) {
170 b1d10856 Jan Kiszka
        while (cur_sector < total_sectors &&
171 57cce12d Jan Kiszka
               !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
172 57cce12d Jan Kiszka
                                  &nr_sectors)) {
173 c163b5ca lirans@il.ibm.com
            cur_sector += nr_sectors;
174 c163b5ca lirans@il.ibm.com
        }
175 c163b5ca lirans@il.ibm.com
    }
176 a55eb92c Jan Kiszka
177 a55eb92c Jan Kiszka
    if (cur_sector >= total_sectors) {
178 82801d8f Jan Kiszka
        bmds->cur_sector = bmds->completed_sectors = total_sectors;
179 c163b5ca lirans@il.ibm.com
        return 1;
180 c163b5ca lirans@il.ibm.com
    }
181 a55eb92c Jan Kiszka
182 82801d8f Jan Kiszka
    bmds->completed_sectors = cur_sector;
183 a55eb92c Jan Kiszka
184 57cce12d Jan Kiszka
    cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
185 57cce12d Jan Kiszka
186 6ea44308 Jan Kiszka
    /* we are going to transfer a full block even if it is not allocated */
187 6ea44308 Jan Kiszka
    nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
188 c163b5ca lirans@il.ibm.com
189 6ea44308 Jan Kiszka
    if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
190 57cce12d Jan Kiszka
        nr_sectors = total_sectors - cur_sector;
191 c163b5ca lirans@il.ibm.com
    }
192 a55eb92c Jan Kiszka
193 13f0b67f Jan Kiszka
    blk = qemu_malloc(sizeof(BlkMigBlock));
194 13f0b67f Jan Kiszka
    blk->buf = qemu_malloc(BLOCK_SIZE);
195 13f0b67f Jan Kiszka
    blk->bmds = bmds;
196 13f0b67f Jan Kiszka
    blk->sector = cur_sector;
197 a55eb92c Jan Kiszka
198 e970ec0b Liran Schour
    blk->iov.iov_base = blk->buf;
199 e970ec0b Liran Schour
    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
200 e970ec0b Liran Schour
    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
201 a55eb92c Jan Kiszka
202 889ae39c Liran Schour
    blk->time = qemu_get_clock_ns(rt_clock);
203 889ae39c Liran Schour
204 e970ec0b Liran Schour
    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
205 e970ec0b Liran Schour
                                nr_sectors, blk_mig_read_cb, blk);
206 e970ec0b Liran Schour
    if (!blk->aiocb) {
207 e970ec0b Liran Schour
        goto error;
208 c163b5ca lirans@il.ibm.com
    }
209 e970ec0b Liran Schour
    block_mig_state.submitted++;
210 d76cac7d Liran Schour
211 13f0b67f Jan Kiszka
    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
212 13f0b67f Jan Kiszka
    bmds->cur_sector = cur_sector + nr_sectors;
213 a55eb92c Jan Kiszka
214 13f0b67f Jan Kiszka
    return (bmds->cur_sector >= total_sectors);
215 4b640365 Jan Kiszka
216 4b640365 Jan Kiszka
error:
217 7184049e Jan Kiszka
    monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector);
218 4b640365 Jan Kiszka
    qemu_file_set_error(f);
219 4b640365 Jan Kiszka
    qemu_free(blk->buf);
220 4b640365 Jan Kiszka
    qemu_free(blk);
221 4b640365 Jan Kiszka
    return 0;
222 c163b5ca lirans@il.ibm.com
}
223 c163b5ca lirans@il.ibm.com
224 c163b5ca lirans@il.ibm.com
static void set_dirty_tracking(int enable)
225 c163b5ca lirans@il.ibm.com
{
226 c163b5ca lirans@il.ibm.com
    BlkMigDevState *bmds;
227 5e5328be Jan Kiszka
228 5e5328be Jan Kiszka
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
229 a55eb92c Jan Kiszka
        bdrv_set_dirty_tracking(bmds->bs, enable);
230 c163b5ca lirans@il.ibm.com
    }
231 c163b5ca lirans@il.ibm.com
}
232 c163b5ca lirans@il.ibm.com
233 b66460e4 Stefan Hajnoczi
static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
234 c163b5ca lirans@il.ibm.com
{
235 b66460e4 Stefan Hajnoczi
    Monitor *mon = opaque;
236 5e5328be Jan Kiszka
    BlkMigDevState *bmds;
237 792773b2 Jan Kiszka
    int64_t sectors;
238 a55eb92c Jan Kiszka
239 d246673d Markus Armbruster
    if (!bdrv_is_read_only(bs)) {
240 b66460e4 Stefan Hajnoczi
        sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
241 31f54f24 Shahar Havivi
        if (sectors <= 0) {
242 b66460e4 Stefan Hajnoczi
            return;
243 b66460e4 Stefan Hajnoczi
        }
244 b66460e4 Stefan Hajnoczi
245 b66460e4 Stefan Hajnoczi
        bmds = qemu_mallocz(sizeof(BlkMigDevState));
246 b66460e4 Stefan Hajnoczi
        bmds->bs = bs;
247 b66460e4 Stefan Hajnoczi
        bmds->bulk_completed = 0;
248 b66460e4 Stefan Hajnoczi
        bmds->total_sectors = sectors;
249 b66460e4 Stefan Hajnoczi
        bmds->completed_sectors = 0;
250 b66460e4 Stefan Hajnoczi
        bmds->shared_base = block_mig_state.shared_base;
251 b66460e4 Stefan Hajnoczi
252 b66460e4 Stefan Hajnoczi
        block_mig_state.total_sector_sum += sectors;
253 b66460e4 Stefan Hajnoczi
254 b66460e4 Stefan Hajnoczi
        if (bmds->shared_base) {
255 b66460e4 Stefan Hajnoczi
            monitor_printf(mon, "Start migration for %s with shared base "
256 b66460e4 Stefan Hajnoczi
                                "image\n",
257 b66460e4 Stefan Hajnoczi
                           bs->device_name);
258 b66460e4 Stefan Hajnoczi
        } else {
259 b66460e4 Stefan Hajnoczi
            monitor_printf(mon, "Start full migration for %s\n",
260 b66460e4 Stefan Hajnoczi
                           bs->device_name);
261 b66460e4 Stefan Hajnoczi
        }
262 b66460e4 Stefan Hajnoczi
263 b66460e4 Stefan Hajnoczi
        QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
264 b66460e4 Stefan Hajnoczi
    }
265 b66460e4 Stefan Hajnoczi
}
266 b66460e4 Stefan Hajnoczi
267 b66460e4 Stefan Hajnoczi
static void init_blk_migration(Monitor *mon, QEMUFile *f)
268 b66460e4 Stefan Hajnoczi
{
269 69d63a97 Jan Kiszka
    block_mig_state.submitted = 0;
270 69d63a97 Jan Kiszka
    block_mig_state.read_done = 0;
271 69d63a97 Jan Kiszka
    block_mig_state.transferred = 0;
272 82801d8f Jan Kiszka
    block_mig_state.total_sector_sum = 0;
273 01e61e2d Jan Kiszka
    block_mig_state.prev_progress = -1;
274 e970ec0b Liran Schour
    block_mig_state.bulk_completed = 0;
275 889ae39c Liran Schour
    block_mig_state.total_time = 0;
276 889ae39c Liran Schour
    block_mig_state.reads = 0;
277 69d63a97 Jan Kiszka
278 b66460e4 Stefan Hajnoczi
    bdrv_iterate(init_blk_migration_it, mon);
279 c163b5ca lirans@il.ibm.com
}
280 c163b5ca lirans@il.ibm.com
281 e970ec0b Liran Schour
static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f)
282 c163b5ca lirans@il.ibm.com
{
283 82801d8f Jan Kiszka
    int64_t completed_sector_sum = 0;
284 c163b5ca lirans@il.ibm.com
    BlkMigDevState *bmds;
285 01e61e2d Jan Kiszka
    int progress;
286 82801d8f Jan Kiszka
    int ret = 0;
287 c163b5ca lirans@il.ibm.com
288 5e5328be Jan Kiszka
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
289 a55eb92c Jan Kiszka
        if (bmds->bulk_completed == 0) {
290 e970ec0b Liran Schour
            if (mig_save_device_bulk(mon, f, bmds) == 1) {
291 57cce12d Jan Kiszka
                /* completed bulk section for this device */
292 57cce12d Jan Kiszka
                bmds->bulk_completed = 1;
293 c163b5ca lirans@il.ibm.com
            }
294 82801d8f Jan Kiszka
            completed_sector_sum += bmds->completed_sectors;
295 82801d8f Jan Kiszka
            ret = 1;
296 82801d8f Jan Kiszka
            break;
297 82801d8f Jan Kiszka
        } else {
298 82801d8f Jan Kiszka
            completed_sector_sum += bmds->completed_sectors;
299 c163b5ca lirans@il.ibm.com
        }
300 c163b5ca lirans@il.ibm.com
    }
301 a55eb92c Jan Kiszka
302 01e61e2d Jan Kiszka
    progress = completed_sector_sum * 100 / block_mig_state.total_sector_sum;
303 01e61e2d Jan Kiszka
    if (progress != block_mig_state.prev_progress) {
304 01e61e2d Jan Kiszka
        block_mig_state.prev_progress = progress;
305 01e61e2d Jan Kiszka
        qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
306 01e61e2d Jan Kiszka
                         | BLK_MIG_FLAG_PROGRESS);
307 01e61e2d Jan Kiszka
        monitor_printf(mon, "Completed %d %%\r", progress);
308 7184049e Jan Kiszka
        monitor_flush(mon);
309 82801d8f Jan Kiszka
    }
310 82801d8f Jan Kiszka
311 82801d8f Jan Kiszka
    return ret;
312 c163b5ca lirans@il.ibm.com
}
313 c163b5ca lirans@il.ibm.com
314 d76cac7d Liran Schour
static void blk_mig_reset_dirty_cursor(void)
315 c163b5ca lirans@il.ibm.com
{
316 c163b5ca lirans@il.ibm.com
    BlkMigDevState *bmds;
317 d76cac7d Liran Schour
318 d76cac7d Liran Schour
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
319 d76cac7d Liran Schour
        bmds->cur_dirty = 0;
320 d76cac7d Liran Schour
    }
321 d76cac7d Liran Schour
}
322 d76cac7d Liran Schour
323 d76cac7d Liran Schour
static int mig_save_device_dirty(Monitor *mon, QEMUFile *f,
324 d76cac7d Liran Schour
                                 BlkMigDevState *bmds, int is_async)
325 d76cac7d Liran Schour
{
326 d76cac7d Liran Schour
    BlkMigBlock *blk;
327 d76cac7d Liran Schour
    int64_t total_sectors = bmds->total_sectors;
328 c163b5ca lirans@il.ibm.com
    int64_t sector;
329 d76cac7d Liran Schour
    int nr_sectors;
330 a55eb92c Jan Kiszka
331 d76cac7d Liran Schour
    for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
332 d76cac7d Liran Schour
        if (bdrv_get_dirty(bmds->bs, sector)) {
333 575a58d7 Jan Kiszka
334 d76cac7d Liran Schour
            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
335 d76cac7d Liran Schour
                nr_sectors = total_sectors - sector;
336 d76cac7d Liran Schour
            } else {
337 d76cac7d Liran Schour
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
338 d76cac7d Liran Schour
            }
339 d76cac7d Liran Schour
            blk = qemu_malloc(sizeof(BlkMigBlock));
340 d76cac7d Liran Schour
            blk->buf = qemu_malloc(BLOCK_SIZE);
341 d76cac7d Liran Schour
            blk->bmds = bmds;
342 d76cac7d Liran Schour
            blk->sector = sector;
343 d76cac7d Liran Schour
344 889ae39c Liran Schour
            if (is_async) {
345 d76cac7d Liran Schour
                blk->iov.iov_base = blk->buf;
346 d76cac7d Liran Schour
                blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
347 d76cac7d Liran Schour
                qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
348 d76cac7d Liran Schour
349 bd0858bb Yoshiaki Tamura
                blk->time = qemu_get_clock_ns(rt_clock);
350 889ae39c Liran Schour
351 d76cac7d Liran Schour
                blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
352 d76cac7d Liran Schour
                                            nr_sectors, blk_mig_read_cb, blk);
353 d76cac7d Liran Schour
                if (!blk->aiocb) {
354 d76cac7d Liran Schour
                    goto error;
355 d76cac7d Liran Schour
                }
356 d76cac7d Liran Schour
                block_mig_state.submitted++;
357 d76cac7d Liran Schour
            } else {
358 d76cac7d Liran Schour
                if (bdrv_read(bmds->bs, sector, blk->buf,
359 d76cac7d Liran Schour
                              nr_sectors) < 0) {
360 d76cac7d Liran Schour
                    goto error;
361 c163b5ca lirans@il.ibm.com
                }
362 d76cac7d Liran Schour
                blk_send(f, blk);
363 a55eb92c Jan Kiszka
364 d76cac7d Liran Schour
                qemu_free(blk->buf);
365 d76cac7d Liran Schour
                qemu_free(blk);
366 a55eb92c Jan Kiszka
            }
367 d76cac7d Liran Schour
368 d76cac7d Liran Schour
            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
369 d76cac7d Liran Schour
            break;
370 c163b5ca lirans@il.ibm.com
        }
371 d76cac7d Liran Schour
        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
372 d76cac7d Liran Schour
        bmds->cur_dirty = sector;
373 c163b5ca lirans@il.ibm.com
    }
374 575a58d7 Jan Kiszka
375 d76cac7d Liran Schour
    return (bmds->cur_dirty >= bmds->total_sectors);
376 d76cac7d Liran Schour
377 889ae39c Liran Schour
error:
378 d76cac7d Liran Schour
    monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector);
379 d76cac7d Liran Schour
    qemu_file_set_error(f);
380 d76cac7d Liran Schour
    qemu_free(blk->buf);
381 d76cac7d Liran Schour
    qemu_free(blk);
382 d76cac7d Liran Schour
    return 0;
383 d76cac7d Liran Schour
}
384 d76cac7d Liran Schour
385 d76cac7d Liran Schour
static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async)
386 d76cac7d Liran Schour
{
387 d76cac7d Liran Schour
    BlkMigDevState *bmds;
388 d76cac7d Liran Schour
    int ret = 0;
389 d76cac7d Liran Schour
390 d76cac7d Liran Schour
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
391 889ae39c Liran Schour
        if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) {
392 d76cac7d Liran Schour
            ret = 1;
393 d76cac7d Liran Schour
            break;
394 d76cac7d Liran Schour
        }
395 d76cac7d Liran Schour
    }
396 d76cac7d Liran Schour
397 d76cac7d Liran Schour
    return ret;
398 c163b5ca lirans@il.ibm.com
}
399 c163b5ca lirans@il.ibm.com
400 c163b5ca lirans@il.ibm.com
static void flush_blks(QEMUFile* f)
401 c163b5ca lirans@il.ibm.com
{
402 5e5328be Jan Kiszka
    BlkMigBlock *blk;
403 a55eb92c Jan Kiszka
404 d0f2c4c6 malc
    DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
405 d11ecd3d Jan Kiszka
            __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
406 d11ecd3d Jan Kiszka
            block_mig_state.transferred);
407 a55eb92c Jan Kiszka
408 5e5328be Jan Kiszka
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
409 5e5328be Jan Kiszka
        if (qemu_file_rate_limit(f)) {
410 5e5328be Jan Kiszka
            break;
411 5e5328be Jan Kiszka
        }
412 4b640365 Jan Kiszka
        if (blk->ret < 0) {
413 4b640365 Jan Kiszka
            qemu_file_set_error(f);
414 4b640365 Jan Kiszka
            break;
415 4b640365 Jan Kiszka
        }
416 13f0b67f Jan Kiszka
        blk_send(f, blk);
417 a55eb92c Jan Kiszka
418 5e5328be Jan Kiszka
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
419 c163b5ca lirans@il.ibm.com
        qemu_free(blk->buf);
420 c163b5ca lirans@il.ibm.com
        qemu_free(blk);
421 a55eb92c Jan Kiszka
422 d11ecd3d Jan Kiszka
        block_mig_state.read_done--;
423 d11ecd3d Jan Kiszka
        block_mig_state.transferred++;
424 d11ecd3d Jan Kiszka
        assert(block_mig_state.read_done >= 0);
425 c163b5ca lirans@il.ibm.com
    }
426 c163b5ca lirans@il.ibm.com
427 d0f2c4c6 malc
    DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
428 d11ecd3d Jan Kiszka
            block_mig_state.submitted, block_mig_state.read_done,
429 d11ecd3d Jan Kiszka
            block_mig_state.transferred);
430 c163b5ca lirans@il.ibm.com
}
431 c163b5ca lirans@il.ibm.com
432 889ae39c Liran Schour
static int64_t get_remaining_dirty(void)
433 889ae39c Liran Schour
{
434 889ae39c Liran Schour
    BlkMigDevState *bmds;
435 889ae39c Liran Schour
    int64_t dirty = 0;
436 889ae39c Liran Schour
437 889ae39c Liran Schour
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
438 889ae39c Liran Schour
        dirty += bdrv_get_dirty_count(bmds->bs);
439 889ae39c Liran Schour
    }
440 889ae39c Liran Schour
441 889ae39c Liran Schour
    return dirty * BLOCK_SIZE;
442 889ae39c Liran Schour
}
443 889ae39c Liran Schour
444 c163b5ca lirans@il.ibm.com
static int is_stage2_completed(void)
445 c163b5ca lirans@il.ibm.com
{
446 889ae39c Liran Schour
    int64_t remaining_dirty;
447 889ae39c Liran Schour
    long double bwidth;
448 889ae39c Liran Schour
449 889ae39c Liran Schour
    if (block_mig_state.bulk_completed == 1) {
450 889ae39c Liran Schour
451 889ae39c Liran Schour
        remaining_dirty = get_remaining_dirty();
452 bd0858bb Yoshiaki Tamura
        if (remaining_dirty == 0) {
453 bd0858bb Yoshiaki Tamura
            return 1;
454 bd0858bb Yoshiaki Tamura
        }
455 889ae39c Liran Schour
456 bd0858bb Yoshiaki Tamura
        bwidth = compute_read_bwidth();
457 889ae39c Liran Schour
458 bd0858bb Yoshiaki Tamura
        if ((remaining_dirty / bwidth) <=
459 889ae39c Liran Schour
            migrate_max_downtime()) {
460 889ae39c Liran Schour
            /* finish stage2 because we think that we can finish remaing work
461 889ae39c Liran Schour
               below max_downtime */
462 889ae39c Liran Schour
463 889ae39c Liran Schour
            return 1;
464 889ae39c Liran Schour
        }
465 889ae39c Liran Schour
    }
466 889ae39c Liran Schour
467 889ae39c Liran Schour
    return 0;
468 c163b5ca lirans@il.ibm.com
}
469 c163b5ca lirans@il.ibm.com
470 7184049e Jan Kiszka
static void blk_mig_cleanup(Monitor *mon)
471 4ec7fcc7 Jan Kiszka
{
472 82801d8f Jan Kiszka
    BlkMigDevState *bmds;
473 82801d8f Jan Kiszka
    BlkMigBlock *blk;
474 4ec7fcc7 Jan Kiszka
475 82801d8f Jan Kiszka
    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
476 82801d8f Jan Kiszka
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
477 4ec7fcc7 Jan Kiszka
        qemu_free(bmds);
478 4ec7fcc7 Jan Kiszka
    }
479 4ec7fcc7 Jan Kiszka
480 82801d8f Jan Kiszka
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
481 82801d8f Jan Kiszka
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
482 4ec7fcc7 Jan Kiszka
        qemu_free(blk->buf);
483 4ec7fcc7 Jan Kiszka
        qemu_free(blk);
484 4ec7fcc7 Jan Kiszka
    }
485 4ec7fcc7 Jan Kiszka
486 4ec7fcc7 Jan Kiszka
    set_dirty_tracking(0);
487 4ec7fcc7 Jan Kiszka
488 7184049e Jan Kiszka
    monitor_printf(mon, "\n");
489 4ec7fcc7 Jan Kiszka
}
490 4ec7fcc7 Jan Kiszka
491 f327aa0c Jan Kiszka
static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
492 c163b5ca lirans@il.ibm.com
{
493 d0f2c4c6 malc
    DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
494 d11ecd3d Jan Kiszka
            stage, block_mig_state.submitted, block_mig_state.transferred);
495 a55eb92c Jan Kiszka
496 4ec7fcc7 Jan Kiszka
    if (stage < 0) {
497 7184049e Jan Kiszka
        blk_mig_cleanup(mon);
498 4ec7fcc7 Jan Kiszka
        return 0;
499 4ec7fcc7 Jan Kiszka
    }
500 4ec7fcc7 Jan Kiszka
501 d11ecd3d Jan Kiszka
    if (block_mig_state.blk_enable != 1) {
502 c163b5ca lirans@il.ibm.com
        /* no need to migrate storage */
503 a55eb92c Jan Kiszka
        qemu_put_be64(f, BLK_MIG_FLAG_EOS);
504 c163b5ca lirans@il.ibm.com
        return 1;
505 c163b5ca lirans@il.ibm.com
    }
506 a55eb92c Jan Kiszka
507 a55eb92c Jan Kiszka
    if (stage == 1) {
508 7184049e Jan Kiszka
        init_blk_migration(mon, f);
509 a55eb92c Jan Kiszka
510 c163b5ca lirans@il.ibm.com
        /* start track dirty blocks */
511 c163b5ca lirans@il.ibm.com
        set_dirty_tracking(1);
512 c163b5ca lirans@il.ibm.com
    }
513 c163b5ca lirans@il.ibm.com
514 c163b5ca lirans@il.ibm.com
    flush_blks(f);
515 a55eb92c Jan Kiszka
516 4b640365 Jan Kiszka
    if (qemu_file_has_error(f)) {
517 7184049e Jan Kiszka
        blk_mig_cleanup(mon);
518 4b640365 Jan Kiszka
        return 0;
519 4b640365 Jan Kiszka
    }
520 4b640365 Jan Kiszka
521 d76cac7d Liran Schour
    blk_mig_reset_dirty_cursor();
522 d76cac7d Liran Schour
523 889ae39c Liran Schour
    if (stage == 2) {
524 d76cac7d Liran Schour
        /* control the rate of transfer */
525 d76cac7d Liran Schour
        while ((block_mig_state.submitted +
526 d76cac7d Liran Schour
                block_mig_state.read_done) * BLOCK_SIZE <
527 d76cac7d Liran Schour
               qemu_file_get_rate_limit(f)) {
528 d76cac7d Liran Schour
            if (block_mig_state.bulk_completed == 0) {
529 d76cac7d Liran Schour
                /* first finish the bulk phase */
530 d76cac7d Liran Schour
                if (blk_mig_save_bulked_block(mon, f) == 0) {
531 889ae39c Liran Schour
                    /* finished saving bulk on all devices */
532 d76cac7d Liran Schour
                    block_mig_state.bulk_completed = 1;
533 d76cac7d Liran Schour
                }
534 d76cac7d Liran Schour
            } else {
535 d76cac7d Liran Schour
                if (blk_mig_save_dirty_block(mon, f, 1) == 0) {
536 d76cac7d Liran Schour
                    /* no more dirty blocks */
537 d76cac7d Liran Schour
                    break;
538 d76cac7d Liran Schour
                }
539 d76cac7d Liran Schour
            }
540 a55eb92c Jan Kiszka
        }
541 a55eb92c Jan Kiszka
542 d76cac7d Liran Schour
        flush_blks(f);
543 a55eb92c Jan Kiszka
544 d76cac7d Liran Schour
        if (qemu_file_has_error(f)) {
545 d76cac7d Liran Schour
            blk_mig_cleanup(mon);
546 d76cac7d Liran Schour
            return 0;
547 d76cac7d Liran Schour
        }
548 4b640365 Jan Kiszka
    }
549 4b640365 Jan Kiszka
550 a55eb92c Jan Kiszka
    if (stage == 3) {
551 889ae39c Liran Schour
        /* we know for sure that save bulk is completed and
552 889ae39c Liran Schour
           all async read completed */
553 889ae39c Liran Schour
        assert(block_mig_state.submitted == 0);
554 a55eb92c Jan Kiszka
555 889ae39c Liran Schour
        while (blk_mig_save_dirty_block(mon, f, 0) != 0);
556 7184049e Jan Kiszka
        blk_mig_cleanup(mon);
557 a55eb92c Jan Kiszka
558 01e61e2d Jan Kiszka
        /* report completion */
559 01e61e2d Jan Kiszka
        qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
560 01e61e2d Jan Kiszka
561 4b640365 Jan Kiszka
        if (qemu_file_has_error(f)) {
562 4b640365 Jan Kiszka
            return 0;
563 4b640365 Jan Kiszka
        }
564 4b640365 Jan Kiszka
565 7184049e Jan Kiszka
        monitor_printf(mon, "Block migration completed\n");
566 c163b5ca lirans@il.ibm.com
    }
567 a55eb92c Jan Kiszka
568 a55eb92c Jan Kiszka
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
569 a55eb92c Jan Kiszka
570 c163b5ca lirans@il.ibm.com
    return ((stage == 2) && is_stage2_completed());
571 c163b5ca lirans@il.ibm.com
}
572 c163b5ca lirans@il.ibm.com
573 c163b5ca lirans@il.ibm.com
static int block_load(QEMUFile *f, void *opaque, int version_id)
574 c163b5ca lirans@il.ibm.com
{
575 01e61e2d Jan Kiszka
    static int banner_printed;
576 c163b5ca lirans@il.ibm.com
    int len, flags;
577 c163b5ca lirans@il.ibm.com
    char device_name[256];
578 c163b5ca lirans@il.ibm.com
    int64_t addr;
579 c163b5ca lirans@il.ibm.com
    BlockDriverState *bs;
580 c163b5ca lirans@il.ibm.com
    uint8_t *buf;
581 a55eb92c Jan Kiszka
582 c163b5ca lirans@il.ibm.com
    do {
583 c163b5ca lirans@il.ibm.com
        addr = qemu_get_be64(f);
584 a55eb92c Jan Kiszka
585 6ea44308 Jan Kiszka
        flags = addr & ~BDRV_SECTOR_MASK;
586 6ea44308 Jan Kiszka
        addr >>= BDRV_SECTOR_BITS;
587 a55eb92c Jan Kiszka
588 a55eb92c Jan Kiszka
        if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
589 b02bea3a Yoshiaki Tamura
            int ret;
590 c163b5ca lirans@il.ibm.com
            /* get device name */
591 c163b5ca lirans@il.ibm.com
            len = qemu_get_byte(f);
592 c163b5ca lirans@il.ibm.com
            qemu_get_buffer(f, (uint8_t *)device_name, len);
593 c163b5ca lirans@il.ibm.com
            device_name[len] = '\0';
594 a55eb92c Jan Kiszka
595 c163b5ca lirans@il.ibm.com
            bs = bdrv_find(device_name);
596 4b640365 Jan Kiszka
            if (!bs) {
597 4b640365 Jan Kiszka
                fprintf(stderr, "Error unknown block device %s\n",
598 4b640365 Jan Kiszka
                        device_name);
599 4b640365 Jan Kiszka
                return -EINVAL;
600 4b640365 Jan Kiszka
            }
601 a55eb92c Jan Kiszka
602 575a58d7 Jan Kiszka
            buf = qemu_malloc(BLOCK_SIZE);
603 575a58d7 Jan Kiszka
604 a55eb92c Jan Kiszka
            qemu_get_buffer(f, buf, BLOCK_SIZE);
605 b02bea3a Yoshiaki Tamura
            ret = bdrv_write(bs, addr, buf, BDRV_SECTORS_PER_DIRTY_CHUNK);
606 575a58d7 Jan Kiszka
607 575a58d7 Jan Kiszka
            qemu_free(buf);
608 b02bea3a Yoshiaki Tamura
            if (ret < 0) {
609 b02bea3a Yoshiaki Tamura
                return ret;
610 b02bea3a Yoshiaki Tamura
            }
611 01e61e2d Jan Kiszka
        } else if (flags & BLK_MIG_FLAG_PROGRESS) {
612 01e61e2d Jan Kiszka
            if (!banner_printed) {
613 01e61e2d Jan Kiszka
                printf("Receiving block device images\n");
614 01e61e2d Jan Kiszka
                banner_printed = 1;
615 01e61e2d Jan Kiszka
            }
616 01e61e2d Jan Kiszka
            printf("Completed %d %%%c", (int)addr,
617 01e61e2d Jan Kiszka
                   (addr == 100) ? '\n' : '\r');
618 01e61e2d Jan Kiszka
            fflush(stdout);
619 a55eb92c Jan Kiszka
        } else if (!(flags & BLK_MIG_FLAG_EOS)) {
620 4b640365 Jan Kiszka
            fprintf(stderr, "Unknown flags\n");
621 4b640365 Jan Kiszka
            return -EINVAL;
622 4b640365 Jan Kiszka
        }
623 4b640365 Jan Kiszka
        if (qemu_file_has_error(f)) {
624 4b640365 Jan Kiszka
            return -EIO;
625 c163b5ca lirans@il.ibm.com
        }
626 a55eb92c Jan Kiszka
    } while (!(flags & BLK_MIG_FLAG_EOS));
627 a55eb92c Jan Kiszka
628 c163b5ca lirans@il.ibm.com
    return 0;
629 c163b5ca lirans@il.ibm.com
}
630 c163b5ca lirans@il.ibm.com
631 c163b5ca lirans@il.ibm.com
static void block_set_params(int blk_enable, int shared_base, void *opaque)
632 c163b5ca lirans@il.ibm.com
{
633 d11ecd3d Jan Kiszka
    block_mig_state.blk_enable = blk_enable;
634 d11ecd3d Jan Kiszka
    block_mig_state.shared_base = shared_base;
635 a55eb92c Jan Kiszka
636 c163b5ca lirans@il.ibm.com
    /* shared base means that blk_enable = 1 */
637 d11ecd3d Jan Kiszka
    block_mig_state.blk_enable |= shared_base;
638 c163b5ca lirans@il.ibm.com
}
639 c163b5ca lirans@il.ibm.com
640 c163b5ca lirans@il.ibm.com
void blk_mig_init(void)
641 a55eb92c Jan Kiszka
{
642 5e5328be Jan Kiszka
    QSIMPLEQ_INIT(&block_mig_state.bmds_list);
643 5e5328be Jan Kiszka
    QSIMPLEQ_INIT(&block_mig_state.blk_list);
644 5e5328be Jan Kiszka
645 0be71e32 Alex Williamson
    register_savevm_live(NULL, "block", 0, 1, block_set_params,
646 0be71e32 Alex Williamson
                         block_save_live, NULL, block_load, &block_mig_state);
647 c163b5ca lirans@il.ibm.com
}