Statistics
| Branch: | Revision:

root / block-migration.c @ 31410948

History | View | Annotate | Download (21.8 kB)

1
/*
2
 * QEMU live block migration
3
 *
4
 * Copyright IBM, Corp. 2009
5
 *
6
 * Authors:
7
 *  Liran Schour   <lirans@il.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 * Contributions after 2012-01-13 are licensed under the terms of the
13
 * GNU GPL, version 2 or (at your option) any later version.
14
 */
15

    
16
#include "qemu-common.h"
17
#include "block/block_int.h"
18
#include "hw/hw.h"
19
#include "qemu/queue.h"
20
#include "qemu/timer.h"
21
#include "migration/block.h"
22
#include "migration/migration.h"
23
#include "sysemu/blockdev.h"
24
#include <assert.h>
25

    
26
#define BLOCK_SIZE                       (1 << 20)
27
#define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
28

    
29
#define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
30
#define BLK_MIG_FLAG_EOS                0x02
31
#define BLK_MIG_FLAG_PROGRESS           0x04
32

    
33
#define MAX_IS_ALLOCATED_SEARCH 65536
34

    
35
//#define DEBUG_BLK_MIGRATION
36

    
37
#ifdef DEBUG_BLK_MIGRATION
38
#define DPRINTF(fmt, ...) \
39
    do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
40
#else
41
#define DPRINTF(fmt, ...) \
42
    do { } while (0)
43
#endif
44

    
45
typedef struct BlkMigDevState {
46
    /* Written during setup phase.  Can be read without a lock.  */
47
    BlockDriverState *bs;
48
    int shared_base;
49
    int64_t total_sectors;
50
    QSIMPLEQ_ENTRY(BlkMigDevState) entry;
51

    
52
    /* Only used by migration thread.  Does not need a lock.  */
53
    int bulk_completed;
54
    int64_t cur_sector;
55
    int64_t cur_dirty;
56

    
57
    /* Protected by block migration lock.  */
58
    unsigned long *aio_bitmap;
59
    int64_t completed_sectors;
60
} BlkMigDevState;
61

    
62
typedef struct BlkMigBlock {
63
    /* Only used by migration thread.  */
64
    uint8_t *buf;
65
    BlkMigDevState *bmds;
66
    int64_t sector;
67
    int nr_sectors;
68
    struct iovec iov;
69
    QEMUIOVector qiov;
70
    BlockDriverAIOCB *aiocb;
71

    
72
    /* Protected by block migration lock.  */
73
    int ret;
74
    QSIMPLEQ_ENTRY(BlkMigBlock) entry;
75
} BlkMigBlock;
76

    
77
typedef struct BlkMigState {
78
    /* Written during setup phase.  Can be read without a lock.  */
79
    int blk_enable;
80
    int shared_base;
81
    QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
82
    int64_t total_sector_sum;
83

    
84
    /* Protected by lock.  */
85
    QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
86
    int submitted;
87
    int read_done;
88

    
89
    /* Only used by migration thread.  Does not need a lock.  */
90
    int transferred;
91
    int prev_progress;
92
    int bulk_completed;
93

    
94
    /* Lock must be taken _inside_ the iothread lock.  */
95
    QemuMutex lock;
96
} BlkMigState;
97

    
98
static BlkMigState block_mig_state;
99

    
100
static void blk_mig_lock(void)
101
{
102
    qemu_mutex_lock(&block_mig_state.lock);
103
}
104

    
105
static void blk_mig_unlock(void)
106
{
107
    qemu_mutex_unlock(&block_mig_state.lock);
108
}
109

    
110
/* Must run outside of the iothread lock during the bulk phase,
111
 * or the VM will stall.
112
 */
113

    
114
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
115
{
116
    int len;
117

    
118
    /* sector number and flags */
119
    qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
120
                     | BLK_MIG_FLAG_DEVICE_BLOCK);
121

    
122
    /* device name */
123
    len = strlen(blk->bmds->bs->device_name);
124
    qemu_put_byte(f, len);
125
    qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
126

    
127
    qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
128
}
129

    
130
int blk_mig_active(void)
131
{
132
    return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
133
}
134

    
135
uint64_t blk_mig_bytes_transferred(void)
136
{
137
    BlkMigDevState *bmds;
138
    uint64_t sum = 0;
139

    
140
    blk_mig_lock();
141
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
142
        sum += bmds->completed_sectors;
143
    }
144
    blk_mig_unlock();
145
    return sum << BDRV_SECTOR_BITS;
146
}
147

    
148
uint64_t blk_mig_bytes_remaining(void)
149
{
150
    return blk_mig_bytes_total() - blk_mig_bytes_transferred();
151
}
152

    
153
uint64_t blk_mig_bytes_total(void)
154
{
155
    BlkMigDevState *bmds;
156
    uint64_t sum = 0;
157

    
158
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
159
        sum += bmds->total_sectors;
160
    }
161
    return sum << BDRV_SECTOR_BITS;
162
}
163

    
164

    
165
/* Called with migration lock held.  */
166

    
167
static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
168
{
169
    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
170

    
171
    if ((sector << BDRV_SECTOR_BITS) < bdrv_getlength(bmds->bs)) {
172
        return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
173
            (1UL << (chunk % (sizeof(unsigned long) * 8))));
174
    } else {
175
        return 0;
176
    }
177
}
178

    
179
/* Called with migration lock held.  */
180

    
181
static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
182
                             int nb_sectors, int set)
183
{
184
    int64_t start, end;
185
    unsigned long val, idx, bit;
186

    
187
    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
188
    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
189

    
190
    for (; start <= end; start++) {
191
        idx = start / (sizeof(unsigned long) * 8);
192
        bit = start % (sizeof(unsigned long) * 8);
193
        val = bmds->aio_bitmap[idx];
194
        if (set) {
195
            val |= 1UL << bit;
196
        } else {
197
            val &= ~(1UL << bit);
198
        }
199
        bmds->aio_bitmap[idx] = val;
200
    }
201
}
202

    
203
static void alloc_aio_bitmap(BlkMigDevState *bmds)
204
{
205
    BlockDriverState *bs = bmds->bs;
206
    int64_t bitmap_size;
207

    
208
    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
209
            BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
210
    bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
211

    
212
    bmds->aio_bitmap = g_malloc0(bitmap_size);
213
}
214

    
215
/* Never hold migration lock when yielding to the main loop!  */
216

    
217
static void blk_mig_read_cb(void *opaque, int ret)
218
{
219
    BlkMigBlock *blk = opaque;
220

    
221
    blk_mig_lock();
222
    blk->ret = ret;
223

    
224
    QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
225
    bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
226

    
227
    block_mig_state.submitted--;
228
    block_mig_state.read_done++;
229
    assert(block_mig_state.submitted >= 0);
230
    blk_mig_unlock();
231
}
232

    
233
/* Called with no lock taken.  */
234

    
235
static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
236
{
237
    int64_t total_sectors = bmds->total_sectors;
238
    int64_t cur_sector = bmds->cur_sector;
239
    BlockDriverState *bs = bmds->bs;
240
    BlkMigBlock *blk;
241
    int nr_sectors;
242

    
243
    if (bmds->shared_base) {
244
        qemu_mutex_lock_iothread();
245
        while (cur_sector < total_sectors &&
246
               !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
247
                                  &nr_sectors)) {
248
            cur_sector += nr_sectors;
249
        }
250
        qemu_mutex_unlock_iothread();
251
    }
252

    
253
    if (cur_sector >= total_sectors) {
254
        bmds->cur_sector = bmds->completed_sectors = total_sectors;
255
        return 1;
256
    }
257

    
258
    bmds->completed_sectors = cur_sector;
259

    
260
    cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
261

    
262
    /* we are going to transfer a full block even if it is not allocated */
263
    nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
264

    
265
    if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
266
        nr_sectors = total_sectors - cur_sector;
267
    }
268

    
269
    blk = g_malloc(sizeof(BlkMigBlock));
270
    blk->buf = g_malloc(BLOCK_SIZE);
271
    blk->bmds = bmds;
272
    blk->sector = cur_sector;
273
    blk->nr_sectors = nr_sectors;
274

    
275
    blk->iov.iov_base = blk->buf;
276
    blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
277
    qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
278

    
279
    blk_mig_lock();
280
    block_mig_state.submitted++;
281
    blk_mig_unlock();
282

    
283
    qemu_mutex_lock_iothread();
284
    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
285
                                nr_sectors, blk_mig_read_cb, blk);
286

    
287
    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
288
    qemu_mutex_unlock_iothread();
289

    
290
    bmds->cur_sector = cur_sector + nr_sectors;
291
    return (bmds->cur_sector >= total_sectors);
292
}
293

    
294
/* Called with iothread lock taken.  */
295

    
296
static void set_dirty_tracking(int enable)
297
{
298
    BlkMigDevState *bmds;
299

    
300
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
301
        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
302
    }
303
}
304

    
305
static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
306
{
307
    BlkMigDevState *bmds;
308
    int64_t sectors;
309

    
310
    if (!bdrv_is_read_only(bs)) {
311
        sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
312
        if (sectors <= 0) {
313
            return;
314
        }
315

    
316
        bmds = g_malloc0(sizeof(BlkMigDevState));
317
        bmds->bs = bs;
318
        bmds->bulk_completed = 0;
319
        bmds->total_sectors = sectors;
320
        bmds->completed_sectors = 0;
321
        bmds->shared_base = block_mig_state.shared_base;
322
        alloc_aio_bitmap(bmds);
323
        drive_get_ref(drive_get_by_blockdev(bs));
324
        bdrv_set_in_use(bs, 1);
325

    
326
        block_mig_state.total_sector_sum += sectors;
327

    
328
        if (bmds->shared_base) {
329
            DPRINTF("Start migration for %s with shared base image\n",
330
                    bs->device_name);
331
        } else {
332
            DPRINTF("Start full migration for %s\n", bs->device_name);
333
        }
334

    
335
        QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
336
    }
337
}
338

    
339
static void init_blk_migration(QEMUFile *f)
340
{
341
    block_mig_state.submitted = 0;
342
    block_mig_state.read_done = 0;
343
    block_mig_state.transferred = 0;
344
    block_mig_state.total_sector_sum = 0;
345
    block_mig_state.prev_progress = -1;
346
    block_mig_state.bulk_completed = 0;
347

    
348
    bdrv_iterate(init_blk_migration_it, NULL);
349
}
350

    
351
/* Called with no lock taken.  */
352

    
353
static int blk_mig_save_bulked_block(QEMUFile *f)
354
{
355
    int64_t completed_sector_sum = 0;
356
    BlkMigDevState *bmds;
357
    int progress;
358
    int ret = 0;
359

    
360
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
361
        if (bmds->bulk_completed == 0) {
362
            if (mig_save_device_bulk(f, bmds) == 1) {
363
                /* completed bulk section for this device */
364
                bmds->bulk_completed = 1;
365
            }
366
            completed_sector_sum += bmds->completed_sectors;
367
            ret = 1;
368
            break;
369
        } else {
370
            completed_sector_sum += bmds->completed_sectors;
371
        }
372
    }
373

    
374
    if (block_mig_state.total_sector_sum != 0) {
375
        progress = completed_sector_sum * 100 /
376
                   block_mig_state.total_sector_sum;
377
    } else {
378
        progress = 100;
379
    }
380
    if (progress != block_mig_state.prev_progress) {
381
        block_mig_state.prev_progress = progress;
382
        qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
383
                         | BLK_MIG_FLAG_PROGRESS);
384
        DPRINTF("Completed %d %%\r", progress);
385
    }
386

    
387
    return ret;
388
}
389

    
390
static void blk_mig_reset_dirty_cursor(void)
391
{
392
    BlkMigDevState *bmds;
393

    
394
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
395
        bmds->cur_dirty = 0;
396
    }
397
}
398

    
399
/* Called with iothread lock taken.  */
400

    
401
static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
402
                                 int is_async)
403
{
404
    BlkMigBlock *blk;
405
    int64_t total_sectors = bmds->total_sectors;
406
    int64_t sector;
407
    int nr_sectors;
408
    int ret = -EIO;
409

    
410
    for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
411
        blk_mig_lock();
412
        if (bmds_aio_inflight(bmds, sector)) {
413
            blk_mig_unlock();
414
            bdrv_drain_all();
415
        } else {
416
            blk_mig_unlock();
417
        }
418
        if (bdrv_get_dirty(bmds->bs, sector)) {
419

    
420
            if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
421
                nr_sectors = total_sectors - sector;
422
            } else {
423
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
424
            }
425
            blk = g_malloc(sizeof(BlkMigBlock));
426
            blk->buf = g_malloc(BLOCK_SIZE);
427
            blk->bmds = bmds;
428
            blk->sector = sector;
429
            blk->nr_sectors = nr_sectors;
430

    
431
            if (is_async) {
432
                blk->iov.iov_base = blk->buf;
433
                blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
434
                qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
435

    
436
                blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
437
                                            nr_sectors, blk_mig_read_cb, blk);
438

    
439
                blk_mig_lock();
440
                block_mig_state.submitted++;
441
                bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
442
                blk_mig_unlock();
443
            } else {
444
                ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
445
                if (ret < 0) {
446
                    goto error;
447
                }
448
                blk_send(f, blk);
449

    
450
                g_free(blk->buf);
451
                g_free(blk);
452
            }
453

    
454
            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
455
            break;
456
        }
457
        sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
458
        bmds->cur_dirty = sector;
459
    }
460

    
461
    return (bmds->cur_dirty >= bmds->total_sectors);
462

    
463
error:
464
    DPRINTF("Error reading sector %" PRId64 "\n", sector);
465
    g_free(blk->buf);
466
    g_free(blk);
467
    return ret;
468
}
469

    
470
/* Called with iothread lock taken.
471
 *
472
 * return value:
473
 * 0: too much data for max_downtime
474
 * 1: few enough data for max_downtime
475
*/
476
static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
477
{
478
    BlkMigDevState *bmds;
479
    int ret = 1;
480

    
481
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
482
        ret = mig_save_device_dirty(f, bmds, is_async);
483
        if (ret <= 0) {
484
            break;
485
        }
486
    }
487

    
488
    return ret;
489
}
490

    
491
/* Called with no locks taken.  */
492

    
493
static int flush_blks(QEMUFile *f)
494
{
495
    BlkMigBlock *blk;
496
    int ret = 0;
497

    
498
    DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
499
            __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
500
            block_mig_state.transferred);
501

    
502
    blk_mig_lock();
503
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
504
        if (qemu_file_rate_limit(f)) {
505
            break;
506
        }
507
        if (blk->ret < 0) {
508
            ret = blk->ret;
509
            break;
510
        }
511

    
512
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
513
        blk_mig_unlock();
514
        blk_send(f, blk);
515
        blk_mig_lock();
516

    
517
        g_free(blk->buf);
518
        g_free(blk);
519

    
520
        block_mig_state.read_done--;
521
        block_mig_state.transferred++;
522
        assert(block_mig_state.read_done >= 0);
523
    }
524
    blk_mig_unlock();
525

    
526
    DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
527
            block_mig_state.submitted, block_mig_state.read_done,
528
            block_mig_state.transferred);
529
    return ret;
530
}
531

    
532
/* Called with iothread lock taken.  */
533

    
534
static int64_t get_remaining_dirty(void)
535
{
536
    BlkMigDevState *bmds;
537
    int64_t dirty = 0;
538

    
539
    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
540
        dirty += bdrv_get_dirty_count(bmds->bs);
541
    }
542

    
543
    return dirty << BDRV_SECTOR_BITS;
544
}
545

    
546
/* Called with iothread lock taken.  */
547

    
548
static void blk_mig_cleanup(void)
549
{
550
    BlkMigDevState *bmds;
551
    BlkMigBlock *blk;
552

    
553
    bdrv_drain_all();
554

    
555
    set_dirty_tracking(0);
556

    
557
    blk_mig_lock();
558
    while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
559
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
560
        bdrv_set_in_use(bmds->bs, 0);
561
        drive_put_ref(drive_get_by_blockdev(bmds->bs));
562
        g_free(bmds->aio_bitmap);
563
        g_free(bmds);
564
    }
565

    
566
    while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
567
        QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
568
        g_free(blk->buf);
569
        g_free(blk);
570
    }
571
    blk_mig_unlock();
572
}
573

    
574
static void block_migration_cancel(void *opaque)
575
{
576
    blk_mig_cleanup();
577
}
578

    
579
static int block_save_setup(QEMUFile *f, void *opaque)
580
{
581
    int ret;
582

    
583
    DPRINTF("Enter save live setup submitted %d transferred %d\n",
584
            block_mig_state.submitted, block_mig_state.transferred);
585

    
586
    qemu_mutex_lock_iothread();
587
    init_blk_migration(f);
588

    
589
    /* start track dirty blocks */
590
    set_dirty_tracking(1);
591
    qemu_mutex_unlock_iothread();
592

    
593
    ret = flush_blks(f);
594
    blk_mig_reset_dirty_cursor();
595
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
596

    
597
    return ret;
598
}
599

    
600
static int block_save_iterate(QEMUFile *f, void *opaque)
601
{
602
    int ret;
603
    int64_t last_ftell = qemu_ftell(f);
604

    
605
    DPRINTF("Enter save live iterate submitted %d transferred %d\n",
606
            block_mig_state.submitted, block_mig_state.transferred);
607

    
608
    ret = flush_blks(f);
609
    if (ret) {
610
        return ret;
611
    }
612

    
613
    blk_mig_reset_dirty_cursor();
614

    
615
    /* control the rate of transfer */
616
    blk_mig_lock();
617
    while ((block_mig_state.submitted +
618
            block_mig_state.read_done) * BLOCK_SIZE <
619
           qemu_file_get_rate_limit(f)) {
620
        blk_mig_unlock();
621
        if (block_mig_state.bulk_completed == 0) {
622
            /* first finish the bulk phase */
623
            if (blk_mig_save_bulked_block(f) == 0) {
624
                /* finished saving bulk on all devices */
625
                block_mig_state.bulk_completed = 1;
626
            }
627
            ret = 0;
628
        } else {
629
            /* Always called with iothread lock taken for
630
             * simplicity, block_save_complete also calls it.
631
             */
632
            qemu_mutex_lock_iothread();
633
            ret = blk_mig_save_dirty_block(f, 1);
634
            qemu_mutex_unlock_iothread();
635
        }
636
        if (ret < 0) {
637
            return ret;
638
        }
639
        blk_mig_lock();
640
        if (ret != 0) {
641
            /* no more dirty blocks */
642
            break;
643
        }
644
    }
645
    blk_mig_unlock();
646

    
647
    ret = flush_blks(f);
648
    if (ret) {
649
        return ret;
650
    }
651

    
652
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
653
    return qemu_ftell(f) - last_ftell;
654
}
655

    
656
/* Called with iothread lock taken.  */
657

    
658
static int block_save_complete(QEMUFile *f, void *opaque)
659
{
660
    int ret;
661

    
662
    DPRINTF("Enter save live complete submitted %d transferred %d\n",
663
            block_mig_state.submitted, block_mig_state.transferred);
664

    
665
    ret = flush_blks(f);
666
    if (ret) {
667
        return ret;
668
    }
669

    
670
    blk_mig_reset_dirty_cursor();
671

    
672
    /* we know for sure that save bulk is completed and
673
       all async read completed */
674
    blk_mig_lock();
675
    assert(block_mig_state.submitted == 0);
676
    blk_mig_unlock();
677

    
678
    do {
679
        ret = blk_mig_save_dirty_block(f, 0);
680
        if (ret < 0) {
681
            return ret;
682
        }
683
    } while (ret == 0);
684

    
685
    /* report completion */
686
    qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
687

    
688
    DPRINTF("Block migration completed\n");
689

    
690
    qemu_put_be64(f, BLK_MIG_FLAG_EOS);
691

    
692
    blk_mig_cleanup();
693
    return 0;
694
}
695

    
696
static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
697
{
698
    /* Estimate pending number of bytes to send */
699
    uint64_t pending;
700

    
701
    qemu_mutex_lock_iothread();
702
    blk_mig_lock();
703
    pending = get_remaining_dirty() +
704
                       block_mig_state.submitted * BLOCK_SIZE +
705
                       block_mig_state.read_done * BLOCK_SIZE;
706

    
707
    /* Report at least one block pending during bulk phase */
708
    if (pending == 0 && !block_mig_state.bulk_completed) {
709
        pending = BLOCK_SIZE;
710
    }
711
    blk_mig_unlock();
712
    qemu_mutex_unlock_iothread();
713

    
714
    DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
715
    return pending;
716
}
717

    
718
static int block_load(QEMUFile *f, void *opaque, int version_id)
719
{
720
    static int banner_printed;
721
    int len, flags;
722
    char device_name[256];
723
    int64_t addr;
724
    BlockDriverState *bs, *bs_prev = NULL;
725
    uint8_t *buf;
726
    int64_t total_sectors = 0;
727
    int nr_sectors;
728
    int ret;
729

    
730
    do {
731
        addr = qemu_get_be64(f);
732

    
733
        flags = addr & ~BDRV_SECTOR_MASK;
734
        addr >>= BDRV_SECTOR_BITS;
735

    
736
        if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
737
            /* get device name */
738
            len = qemu_get_byte(f);
739
            qemu_get_buffer(f, (uint8_t *)device_name, len);
740
            device_name[len] = '\0';
741

    
742
            bs = bdrv_find(device_name);
743
            if (!bs) {
744
                fprintf(stderr, "Error unknown block device %s\n",
745
                        device_name);
746
                return -EINVAL;
747
            }
748

    
749
            if (bs != bs_prev) {
750
                bs_prev = bs;
751
                total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
752
                if (total_sectors <= 0) {
753
                    error_report("Error getting length of block device %s",
754
                                 device_name);
755
                    return -EINVAL;
756
                }
757
            }
758

    
759
            if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
760
                nr_sectors = total_sectors - addr;
761
            } else {
762
                nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
763
            }
764

    
765
            buf = g_malloc(BLOCK_SIZE);
766

    
767
            qemu_get_buffer(f, buf, BLOCK_SIZE);
768
            ret = bdrv_write(bs, addr, buf, nr_sectors);
769

    
770
            g_free(buf);
771
            if (ret < 0) {
772
                return ret;
773
            }
774
        } else if (flags & BLK_MIG_FLAG_PROGRESS) {
775
            if (!banner_printed) {
776
                printf("Receiving block device images\n");
777
                banner_printed = 1;
778
            }
779
            printf("Completed %d %%%c", (int)addr,
780
                   (addr == 100) ? '\n' : '\r');
781
            fflush(stdout);
782
        } else if (!(flags & BLK_MIG_FLAG_EOS)) {
783
            fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
784
            return -EINVAL;
785
        }
786
        ret = qemu_file_get_error(f);
787
        if (ret != 0) {
788
            return ret;
789
        }
790
    } while (!(flags & BLK_MIG_FLAG_EOS));
791

    
792
    return 0;
793
}
794

    
795
static void block_set_params(const MigrationParams *params, void *opaque)
796
{
797
    block_mig_state.blk_enable = params->blk;
798
    block_mig_state.shared_base = params->shared;
799

    
800
    /* shared base means that blk_enable = 1 */
801
    block_mig_state.blk_enable |= params->shared;
802
}
803

    
804
static bool block_is_active(void *opaque)
805
{
806
    return block_mig_state.blk_enable == 1;
807
}
808

    
809
SaveVMHandlers savevm_block_handlers = {
810
    .set_params = block_set_params,
811
    .save_live_setup = block_save_setup,
812
    .save_live_iterate = block_save_iterate,
813
    .save_live_complete = block_save_complete,
814
    .save_live_pending = block_save_pending,
815
    .load_state = block_load,
816
    .cancel = block_migration_cancel,
817
    .is_active = block_is_active,
818
};
819

    
820
void blk_mig_init(void)
821
{
822
    QSIMPLEQ_INIT(&block_mig_state.bmds_list);
823
    QSIMPLEQ_INIT(&block_mig_state.blk_list);
824
    qemu_mutex_init(&block_mig_state.lock);
825

    
826
    register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
827
                         &block_mig_state);
828
}