Statistics
| Branch: | Revision:

root / block-migration.c @ 737e150e

History | View | Annotate | Download (20.2 kB)

1
/*
2
 * QEMU live block migration
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Liran Schour   <lirans@il.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 * Contributions after 2012-01-13 are licensed under the terms of the
13
 * GNU GPL, version 2 or (at your option) any later version.
14
 */
15

    
16
#include "qemu-common.h"
17
#include "block/block_int.h"
18
#include "hw/hw.h"
19
#include "qemu-queue.h"
20
#include "qemu-timer.h"
21
#include "block-migration.h"
22
#include "migration.h"
23
#include "blockdev.h"
24
#include <assert.h>
25

    
26
#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
27

    
28
#define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
29
#define BLK_MIG_FLAG_EOS                0x02
30
#define BLK_MIG_FLAG_PROGRESS           0x04
31

    
32
#define MAX_IS_ALLOCATED_SEARCH 65536
33

    
34
//#define DEBUG_BLK_MIGRATION
35

    
36
#ifdef DEBUG_BLK_MIGRATION
37
#define DPRINTF(fmt, ...) \
38
    do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
39
#else
40
#define DPRINTF(fmt, ...) \
41
    do { } while (0)
42
#endif
43

    
44
typedef struct BlkMigDevState {
45
    BlockDriverState *bs;
46
    int bulk_completed;
47
    int shared_base;
48
    int64_t cur_sector;
49
    int64_t cur_dirty;
50
    int64_t completed_sectors;
51
    int64_t total_sectors;
52
    int64_t dirty;
53
    QSIMPLEQ_ENTRY(BlkMigDevState) entry;
54
    unsigned long *aio_bitmap;
55
} BlkMigDevState;
56

    
57
typedef struct BlkMigBlock {
58
    uint8_t *buf;
59
    BlkMigDevState *bmds;
60
    int64_t sector;
61
    int nr_sectors;
62
    struct iovec iov;
63
    QEMUIOVector qiov;
64
    BlockDriverAIOCB *aiocb;
65
    int ret;
66
    QSIMPLEQ_ENTRY(BlkMigBlock) entry;
67
} BlkMigBlock;
68

    
69
typedef struct BlkMigState {
70
    int blk_enable;
71
    int shared_base;
72
    QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
73
    QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
74
    int submitted;
75
    int read_done;
76
    int transferred;
77
    int64_t total_sector_sum;
78
    int prev_progress;
79
    int bulk_completed;
80
    long double total_time;
81
    long double prev_time_offset;
82
    int reads;
83
} BlkMigState;
84

    
85
static BlkMigState block_mig_state;
86

    
87
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
88
{
89
    int len;
90

    
91
    /* sector number and flags */
92
    qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
93
                     | BLK_MIG_FLAG_DEVICE_BLOCK);
94

    
95
    /* device name */
96
    len = strlen(blk->bmds->bs->device_name);
97
    qemu_put_byte(f, len);
98
    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
99

    
100
    qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
101
}
102

    
103
int blk_mig_active(void)
104
{
105
    return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
106
}
107

    
108
uint64_t blk_mig_bytes_transferred(void)
109
{
110
    BlkMigDevState *bmds;
111
    uint64_t sum = 0;
112

    
113
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
114
        sum += bmds->completed_sectors;
115
    }
116
    return sum << BDRV_SECTOR_BITS;
117
}
118

    
119
uint64_t blk_mig_bytes_remaining(void)
120
{
121
    return blk_mig_bytes_total() - blk_mig_bytes_transferred();
122
}
123

    
124
uint64_t blk_mig_bytes_total(void)
125
{
126
    BlkMigDevState *bmds;
127
    uint64_t sum = 0;
128

    
129
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
130
        sum += bmds->total_sectors;
131
    }
132
    return sum << BDRV_SECTOR_BITS;
133
}
134

    
135
static inline long double compute_read_bwidth(void)
136
{
137
    assert(block_mig_state.total_time != 0);
138
    return (block_mig_state.reads / block_mig_state.total_time) * BLOCK_SIZE;
139
}
140

    
141
static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
142
{
143
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
144

    
145
    if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
146
        return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
147
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
148
    } else {
149
        return 0;
150
    }
151
}
152

    
153
static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
154
                             int nb_sectors, int set)
155
{
156
    int64_t start, end;
157
    unsigned long val, idx, bit;
158

    
159
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
160
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
161

    
162
    for (; start <= end; start++) {
163
        idx = start / (sizeof(unsigned long) * 8);
164
        bit = start % (sizeof(unsigned long) * 8);
165
        val = bmds->aio_bitmap[idx];
166
        if (set) {
167
            val |= 1UL << bit;
168
        } else {
169
            val &= ~(1UL << bit);
170
        }
171
        bmds->aio_bitmap[idx] = val;
172
    }
173
}
174

    
175
static void alloc_aio_bitmap(BlkMigDevState *bmds)
176
{
177
    BlockDriverState *bs = bmds->bs;
178
    int64_t bitmap_size;
179

    
180
    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
181
            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
182
    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
183

    
184
    bmds->aio_bitmap = g_malloc0(bitmap_size);
185
}
186

    
187
static void blk_mig_read_cb(void *opaque, int ret)
188
{
189
    long double curr_time = qemu_get_clock_ns(rt_clock);
190
    BlkMigBlock *blk = opaque;
191

    
192
    blk->ret = ret;
193

    
194
    block_mig_state.reads++;
195
    block_mig_state.total_time += (curr_time - block_mig_state.prev_time_offset);
196
    block_mig_state.prev_time_offset = curr_time;
197

    
198
    QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
199
    bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
200

    
201
    block_mig_state.submitted--;
202
    block_mig_state.read_done++;
203
    assert(block_mig_state.submitted >= 0);
204
}
205

    
206
static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
207
{
208
    int64_t total_sectors = bmds->total_sectors;
209
    int64_t cur_sector = bmds->cur_sector;
210
    BlockDriverState *bs = bmds->bs;
211
    BlkMigBlock *blk;
212
    int nr_sectors;
213

    
214
    if (bmds->shared_base) {
215
        while (cur_sector < total_sectors &&
216
               !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
217
                                  &nr_sectors)) {
218
            cur_sector += nr_sectors;
219
        }
220
    }
221

    
222
    if (cur_sector >= total_sectors) {
223
        bmds->cur_sector = bmds->completed_sectors = total_sectors;
224
        return 1;
225
    }
226

    
227
    bmds->completed_sectors = cur_sector;
228

    
229
    cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
230

    
231
    /* we are going to transfer a full block even if it is not allocated */
232
    nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
233

    
234
    if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
235
        nr_sectors = total_sectors - cur_sector;
236
    }
237

    
238
    blk = g_malloc(sizeof(BlkMigBlock));
239
    blk->buf = g_malloc(BLOCK_SIZE);
240
    blk->bmds = bmds;
241
    blk->sector = cur_sector;
242
    blk->nr_sectors = nr_sectors;
243

    
244
    blk->iov.iov_base = blk->buf;
245
    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
246
    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
247

    
248
    if (block_mig_state.submitted == 0) {
249
        block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
250
    }
251

    
252
    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
253
                                nr_sectors, blk_mig_read_cb, blk);
254
    block_mig_state.submitted++;
255

    
256
    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
257
    bmds->cur_sector = cur_sector + nr_sectors;
258

    
259
    return (bmds->cur_sector >= total_sectors);
260
}
261

    
262
static void set_dirty_tracking(int enable)
263
{
264
    BlkMigDevState *bmds;
265

    
266
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
267
        bdrv_set_dirty_tracking(bmds->bs, enable);
268
    }
269
}
270

    
271
static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
272
{
273
    BlkMigDevState *bmds;
274
    int64_t sectors;
275

    
276
    if (!bdrv_is_read_only(bs)) {
277
        sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
278
        if (sectors <= 0) {
279
            return;
280
        }
281

    
282
        bmds = g_malloc0(sizeof(BlkMigDevState));
283
        bmds->bs = bs;
284
        bmds->bulk_completed = 0;
285
        bmds->total_sectors = sectors;
286
        bmds->completed_sectors = 0;
287
        bmds->shared_base = block_mig_state.shared_base;
288
        alloc_aio_bitmap(bmds);
289
        drive_get_ref(drive_get_by_blockdev(bs));
290
        bdrv_set_in_use(bs, 1);
291

    
292
        block_mig_state.total_sector_sum += sectors;
293

    
294
        if (bmds->shared_base) {
295
            DPRINTF("Start migration for %s with shared base image\n",
296
                    bs->device_name);
297
        } else {
298
            DPRINTF("Start full migration for %s\n", bs->device_name);
299
        }
300

    
301
        QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
302
    }
303
}
304

    
305
static void init_blk_migration(QEMUFile *f)
306
{
307
    block_mig_state.submitted = 0;
308
    block_mig_state.read_done = 0;
309
    block_mig_state.transferred = 0;
310
    block_mig_state.total_sector_sum = 0;
311
    block_mig_state.prev_progress = -1;
312
    block_mig_state.bulk_completed = 0;
313
    block_mig_state.total_time = 0;
314
    block_mig_state.reads = 0;
315

    
316
    bdrv_iterate(init_blk_migration_it, NULL);
317
}
318

    
319
static int blk_mig_save_bulked_block(QEMUFile *f)
320
{
321
    int64_t completed_sector_sum = 0;
322
    BlkMigDevState *bmds;
323
    int progress;
324
    int ret = 0;
325

    
326
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
327
        if (bmds->bulk_completed == 0) {
328
            if (mig_save_device_bulk(f, bmds) == 1) {
329
                /* completed bulk section for this device */
330
                bmds->bulk_completed = 1;
331
            }
332
            completed_sector_sum += bmds->completed_sectors;
333
            ret = 1;
334
            break;
335
        } else {
336
            completed_sector_sum += bmds->completed_sectors;
337
        }
338
    }
339

    
340
    if (block_mig_state.total_sector_sum != 0) {
341
        progress = completed_sector_sum * 100 /
342
                   block_mig_state.total_sector_sum;
343
    } else {
344
        progress = 100;
345
    }
346
    if (progress != block_mig_state.prev_progress) {
347
        block_mig_state.prev_progress = progress;
348
        qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
349
                         | BLK_MIG_FLAG_PROGRESS);
350
        DPRINTF("Completed %d %%\r", progress);
351
    }
352

    
353
    return ret;
354
}
355

    
356
static void blk_mig_reset_dirty_cursor(void)
357
{
358
    BlkMigDevState *bmds;
359

    
360
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
361
        bmds->cur_dirty = 0;
362
    }
363
}
364

    
365
static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
366
                                 int is_async)
367
{
368
    BlkMigBlock *blk;
369
    int64_t total_sectors = bmds->total_sectors;
370
    int64_t sector;
371
    int nr_sectors;
372
    int ret = -EIO;
373

    
374
    for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
375
        if (bmds_aio_inflight(bmds, sector)) {
376
            bdrv_drain_all();
377
        }
378
        if (bdrv_get_dirty(bmds->bs, sector)) {
379

    
380
            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
381
                nr_sectors = total_sectors - sector;
382
            } else {
383
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
384
            }
385
            blk = g_malloc(sizeof(BlkMigBlock));
386
            blk->buf = g_malloc(BLOCK_SIZE);
387
            blk->bmds = bmds;
388
            blk->sector = sector;
389
            blk->nr_sectors = nr_sectors;
390

    
391
            if (is_async) {
392
                blk->iov.iov_base = blk->buf;
393
                blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
394
                qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
395

    
396
                if (block_mig_state.submitted == 0) {
397
                    block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock);
398
                }
399

    
400
                blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
401
                                            nr_sectors, blk_mig_read_cb, blk);
402
                block_mig_state.submitted++;
403
                bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
404
            } else {
405
                ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
406
                if (ret < 0) {
407
                    goto error;
408
                }
409
                blk_send(f, blk);
410

    
411
                g_free(blk->buf);
412
                g_free(blk);
413
            }
414

    
415
            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
416
            break;
417
        }
418
        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
419
        bmds->cur_dirty = sector;
420
    }
421

    
422
    return (bmds->cur_dirty >= bmds->total_sectors);
423

    
424
error:
425
    DPRINTF("Error reading sector %" PRId64 "\n", sector);
426
    g_free(blk->buf);
427
    g_free(blk);
428
    return ret;
429
}
430

    
431
/* return value:
432
 * 0: too much data for max_downtime
433
 * 1: few enough data for max_downtime
434
*/
435
static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
436
{
437
    BlkMigDevState *bmds;
438
    int ret = 1;
439

    
440
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
441
        ret = mig_save_device_dirty(f, bmds, is_async);
442
        if (ret <= 0) {
443
            break;
444
        }
445
    }
446

    
447
    return ret;
448
}
449

    
450
static int flush_blks(QEMUFile *f)
451
{
452
    BlkMigBlock *blk;
453
    int ret = 0;
454

    
455
    DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
456
            __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
457
            block_mig_state.transferred);
458

    
459
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
460
        if (qemu_file_rate_limit(f)) {
461
            break;
462
        }
463
        if (blk->ret < 0) {
464
            ret = blk->ret;
465
            break;
466
        }
467
        blk_send(f, blk);
468

    
469
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
470
        g_free(blk->buf);
471
        g_free(blk);
472

    
473
        block_mig_state.read_done--;
474
        block_mig_state.transferred++;
475
        assert(block_mig_state.read_done >= 0);
476
    }
477

    
478
    DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
479
            block_mig_state.submitted, block_mig_state.read_done,
480
            block_mig_state.transferred);
481
    return ret;
482
}
483

    
484
static int64_t get_remaining_dirty(void)
485
{
486
    BlkMigDevState *bmds;
487
    int64_t dirty = 0;
488

    
489
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
490
        dirty += bdrv_get_dirty_count(bmds->bs);
491
    }
492

    
493
    return dirty * BLOCK_SIZE;
494
}
495

    
496
static int is_stage2_completed(void)
497
{
498
    int64_t remaining_dirty;
499
    long double bwidth;
500

    
501
    if (block_mig_state.bulk_completed == 1) {
502

    
503
        remaining_dirty = get_remaining_dirty();
504
        if (remaining_dirty == 0) {
505
            return 1;
506
        }
507

    
508
        bwidth = compute_read_bwidth();
509

    
510
        if ((remaining_dirty / bwidth) <=
511
            migrate_max_downtime()) {
512
            /* finish stage2 because we think that we can finish remaining work
513
               below max_downtime */
514

    
515
            return 1;
516
        }
517
    }
518

    
519
    return 0;
520
}
521

    
522
static void blk_mig_cleanup(void)
523
{
524
    BlkMigDevState *bmds;
525
    BlkMigBlock *blk;
526

    
527
    bdrv_drain_all();
528

    
529
    set_dirty_tracking(0);
530

    
531
    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
532
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
533
        bdrv_set_in_use(bmds->bs, 0);
534
        drive_put_ref(drive_get_by_blockdev(bmds->bs));
535
        g_free(bmds->aio_bitmap);
536
        g_free(bmds);
537
    }
538

    
539
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
540
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
541
        g_free(blk->buf);
542
        g_free(blk);
543
    }
544
}
545

    
546
static void block_migration_cancel(void *opaque)
547
{
548
    blk_mig_cleanup();
549
}
550

    
551
static int block_save_setup(QEMUFile *f, void *opaque)
552
{
553
    int ret;
554

    
555
    DPRINTF("Enter save live setup submitted %d transferred %d\n",
556
            block_mig_state.submitted, block_mig_state.transferred);
557

    
558
    init_blk_migration(f);
559

    
560
    /* start track dirty blocks */
561
    set_dirty_tracking(1);
562

    
563
    ret = flush_blks(f);
564
    if (ret) {
565
        blk_mig_cleanup();
566
        return ret;
567
    }
568

    
569
    blk_mig_reset_dirty_cursor();
570

    
571
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
572

    
573
    return 0;
574
}
575

    
576
static int block_save_iterate(QEMUFile *f, void *opaque)
577
{
578
    int ret;
579

    
580
    DPRINTF("Enter save live iterate submitted %d transferred %d\n",
581
            block_mig_state.submitted, block_mig_state.transferred);
582

    
583
    ret = flush_blks(f);
584
    if (ret) {
585
        blk_mig_cleanup();
586
        return ret;
587
    }
588

    
589
    blk_mig_reset_dirty_cursor();
590

    
591
    /* control the rate of transfer */
592
    while ((block_mig_state.submitted +
593
            block_mig_state.read_done) * BLOCK_SIZE <
594
           qemu_file_get_rate_limit(f)) {
595
        if (block_mig_state.bulk_completed == 0) {
596
            /* first finish the bulk phase */
597
            if (blk_mig_save_bulked_block(f) == 0) {
598
                /* finished saving bulk on all devices */
599
                block_mig_state.bulk_completed = 1;
600
            }
601
        } else {
602
            ret = blk_mig_save_dirty_block(f, 1);
603
            if (ret != 0) {
604
                /* no more dirty blocks */
605
                break;
606
            }
607
        }
608
    }
609
    if (ret) {
610
        blk_mig_cleanup();
611
        return ret;
612
    }
613

    
614
    ret = flush_blks(f);
615
    if (ret) {
616
        blk_mig_cleanup();
617
        return ret;
618
    }
619

    
620
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
621

    
622
    return is_stage2_completed();
623
}
624

    
625
static int block_save_complete(QEMUFile *f, void *opaque)
626
{
627
    int ret;
628

    
629
    DPRINTF("Enter save live complete submitted %d transferred %d\n",
630
            block_mig_state.submitted, block_mig_state.transferred);
631

    
632
    ret = flush_blks(f);
633
    if (ret) {
634
        blk_mig_cleanup();
635
        return ret;
636
    }
637

    
638
    blk_mig_reset_dirty_cursor();
639

    
640
    /* we know for sure that save bulk is completed and
641
       all async read completed */
642
    assert(block_mig_state.submitted == 0);
643

    
644
    do {
645
        ret = blk_mig_save_dirty_block(f, 0);
646
    } while (ret == 0);
647

    
648
    blk_mig_cleanup();
649
    if (ret) {
650
        return ret;
651
    }
652
    /* report completion */
653
    qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
654

    
655
    DPRINTF("Block migration completed\n");
656

    
657
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
658

    
659
    return 0;
660
}
661

    
662
static int block_load(QEMUFile *f, void *opaque, int version_id)
663
{
664
    static int banner_printed;
665
    int len, flags;
666
    char device_name[256];
667
    int64_t addr;
668
    BlockDriverState *bs, *bs_prev = NULL;
669
    uint8_t *buf;
670
    int64_t total_sectors = 0;
671
    int nr_sectors;
672
    int ret;
673

    
674
    do {
675
        addr = qemu_get_be64(f);
676

    
677
        flags = addr & ~BDRV_SECTOR_MASK;
678
        addr >>= BDRV_SECTOR_BITS;
679

    
680
        if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
681
            /* get device name */
682
            len = qemu_get_byte(f);
683
            qemu_get_buffer(f, (uint8_t *)device_name, len);
684
            device_name[len] = '\0';
685

    
686
            bs = bdrv_find(device_name);
687
            if (!bs) {
688
                fprintf(stderr, "Error unknown block device %s\n",
689
                        device_name);
690
                return -EINVAL;
691
            }
692

    
693
            if (bs != bs_prev) {
694
                bs_prev = bs;
695
                total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
696
                if (total_sectors <= 0) {
697
                    error_report("Error getting length of block device %s",
698
                                 device_name);
699
                    return -EINVAL;
700
                }
701
            }
702

    
703
            if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
704
                nr_sectors = total_sectors - addr;
705
            } else {
706
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
707
            }
708

    
709
            buf = g_malloc(BLOCK_SIZE);
710

    
711
            qemu_get_buffer(f, buf, BLOCK_SIZE);
712
            ret = bdrv_write(bs, addr, buf, nr_sectors);
713

    
714
            g_free(buf);
715
            if (ret < 0) {
716
                return ret;
717
            }
718
        } else if (flags & BLK_MIG_FLAG_PROGRESS) {
719
            if (!banner_printed) {
720
                printf("Receiving block device images\n");
721
                banner_printed = 1;
722
            }
723
            printf("Completed %d %%%c", (int)addr,
724
                   (addr == 100) ? '\n' : '\r');
725
            fflush(stdout);
726
        } else if (!(flags & BLK_MIG_FLAG_EOS)) {
727
            fprintf(stderr, "Unknown flags\n");
728
            return -EINVAL;
729
        }
730
        ret = qemu_file_get_error(f);
731
        if (ret != 0) {
732
            return ret;
733
        }
734
    } while (!(flags & BLK_MIG_FLAG_EOS));
735

    
736
    return 0;
737
}
738

    
739
static void block_set_params(const MigrationParams *params, void *opaque)
740
{
741
    block_mig_state.blk_enable = params->blk;
742
    block_mig_state.shared_base = params->shared;
743

    
744
    /* shared base means that blk_enable = 1 */
745
    block_mig_state.blk_enable |= params->shared;
746
}
747

    
748
static bool block_is_active(void *opaque)
749
{
750
    return block_mig_state.blk_enable == 1;
751
}
752

    
753
SaveVMHandlers savevm_block_handlers = {
754
    .set_params = block_set_params,
755
    .save_live_setup = block_save_setup,
756
    .save_live_iterate = block_save_iterate,
757
    .save_live_complete = block_save_complete,
758
    .load_state = block_load,
759
    .cancel = block_migration_cancel,
760
    .is_active = block_is_active,
761
};
762

    
763
void blk_mig_init(void)
764
{
765
    QSIMPLEQ_INIT(&block_mig_state.bmds_list);
766
    QSIMPLEQ_INIT(&block_mig_state.blk_list);
767

    
768
    register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
769
                         &block_mig_state);
770
}