Statistics
| Branch: | Revision:

root / arch_init.c @ 89db9987

History | View | Annotate | Download (34.4 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24
#include <stdint.h>
25
#include <stdarg.h>
26
#include <stdlib.h>
27
#ifndef _WIN32
28
#include <sys/types.h>
29
#include <sys/mman.h>
30
#endif
31
#include "config.h"
32
#include "monitor/monitor.h"
33
#include "sysemu/sysemu.h"
34
#include "qemu/bitops.h"
35
#include "qemu/bitmap.h"
36
#include "sysemu/arch_init.h"
37
#include "audio/audio.h"
38
#include "hw/i386/pc.h"
39
#include "hw/pci/pci.h"
40
#include "hw/audio/audio.h"
41
#include "sysemu/kvm.h"
42
#include "migration/migration.h"
43
#include "hw/i386/smbios.h"
44
#include "exec/address-spaces.h"
45
#include "hw/audio/pcspk.h"
46
#include "migration/page_cache.h"
47
#include "qemu/config-file.h"
48
#include "qmp-commands.h"
49
#include "trace.h"
50
#include "exec/cpu-all.h"
51
#include "exec/ram_addr.h"
52
#include "hw/acpi/acpi.h"
53
#include "qemu/host-utils.h"
54

    
55
#ifdef DEBUG_ARCH_INIT
56
#define DPRINTF(fmt, ...) \
57
    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
58
#else
59
#define DPRINTF(fmt, ...) \
60
    do { } while (0)
61
#endif
62

    
63
#ifdef TARGET_SPARC
64
int graphic_width = 1024;
65
int graphic_height = 768;
66
int graphic_depth = 8;
67
#else
68
int graphic_width = 800;
69
int graphic_height = 600;
70
int graphic_depth = 32;
71
#endif
72

    
73

    
74
#if defined(TARGET_ALPHA)
75
#define QEMU_ARCH QEMU_ARCH_ALPHA
76
#elif defined(TARGET_ARM)
77
#define QEMU_ARCH QEMU_ARCH_ARM
78
#elif defined(TARGET_CRIS)
79
#define QEMU_ARCH QEMU_ARCH_CRIS
80
#elif defined(TARGET_I386)
81
#define QEMU_ARCH QEMU_ARCH_I386
82
#elif defined(TARGET_M68K)
83
#define QEMU_ARCH QEMU_ARCH_M68K
84
#elif defined(TARGET_LM32)
85
#define QEMU_ARCH QEMU_ARCH_LM32
86
#elif defined(TARGET_MICROBLAZE)
87
#define QEMU_ARCH QEMU_ARCH_MICROBLAZE
88
#elif defined(TARGET_MIPS)
89
#define QEMU_ARCH QEMU_ARCH_MIPS
90
#elif defined(TARGET_MOXIE)
91
#define QEMU_ARCH QEMU_ARCH_MOXIE
92
#elif defined(TARGET_OPENRISC)
93
#define QEMU_ARCH QEMU_ARCH_OPENRISC
94
#elif defined(TARGET_PPC)
95
#define QEMU_ARCH QEMU_ARCH_PPC
96
#elif defined(TARGET_S390X)
97
#define QEMU_ARCH QEMU_ARCH_S390X
98
#elif defined(TARGET_SH4)
99
#define QEMU_ARCH QEMU_ARCH_SH4
100
#elif defined(TARGET_SPARC)
101
#define QEMU_ARCH QEMU_ARCH_SPARC
102
#elif defined(TARGET_XTENSA)
103
#define QEMU_ARCH QEMU_ARCH_XTENSA
104
#elif defined(TARGET_UNICORE32)
105
#define QEMU_ARCH QEMU_ARCH_UNICORE32
106
#endif
107

    
108
const uint32_t arch_type = QEMU_ARCH;
109
static bool mig_throttle_on;
110
static int dirty_rate_high_cnt;
111
static void check_guest_throttling(void);
112

    
113
/***********************************************************/
114
/* ram save/restore */
115

    
116
#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
117
#define RAM_SAVE_FLAG_COMPRESS 0x02
118
#define RAM_SAVE_FLAG_MEM_SIZE 0x04
119
#define RAM_SAVE_FLAG_PAGE     0x08
120
#define RAM_SAVE_FLAG_EOS      0x10
121
#define RAM_SAVE_FLAG_CONTINUE 0x20
122
#define RAM_SAVE_FLAG_XBZRLE   0x40
123
/* 0x80 is reserved in migration.h start with 0x100 next */
124

    
125

    
126
static struct defconfig_file {
127
    const char *filename;
128
    /* Indicates it is an user config file (disabled by -no-user-config) */
129
    bool userconfig;
130
} default_config_files[] = {
131
    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
132
    { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
133
    { NULL }, /* end of list */
134
};
135

    
136

    
137
int qemu_read_default_config_files(bool userconfig)
138
{
139
    int ret;
140
    struct defconfig_file *f;
141

    
142
    for (f = default_config_files; f->filename; f++) {
143
        if (!userconfig && f->userconfig) {
144
            continue;
145
        }
146
        ret = qemu_read_config_file(f->filename);
147
        if (ret < 0 && ret != -ENOENT) {
148
            return ret;
149
        }
150
    }
151

    
152
    return 0;
153
}
154

    
155
static inline bool is_zero_range(uint8_t *p, uint64_t size)
156
{
157
    return buffer_find_nonzero_offset(p, size) == size;
158
}
159

    
160
/* struct contains XBZRLE cache and a static page
161
   used by the compression */
162
static struct {
163
    /* buffer used for XBZRLE encoding */
164
    uint8_t *encoded_buf;
165
    /* buffer for storing page content */
166
    uint8_t *current_buf;
167
    /* Cache for XBZRLE */
168
    PageCache *cache;
169
} XBZRLE = {
170
    .encoded_buf = NULL,
171
    .current_buf = NULL,
172
    .cache = NULL,
173
};
174
/* buffer used for XBZRLE decoding */
175
static uint8_t *xbzrle_decoded_buf;
176

    
177
int64_t xbzrle_cache_resize(int64_t new_size)
178
{
179
    if (new_size < TARGET_PAGE_SIZE) {
180
        return -1;
181
    }
182

    
183
    if (XBZRLE.cache != NULL) {
184
        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
185
            TARGET_PAGE_SIZE;
186
    }
187
    return pow2floor(new_size);
188
}
189

    
190
/* accounting for migration statistics */
191
typedef struct AccountingInfo {
192
    uint64_t dup_pages;
193
    uint64_t skipped_pages;
194
    uint64_t norm_pages;
195
    uint64_t iterations;
196
    uint64_t xbzrle_bytes;
197
    uint64_t xbzrle_pages;
198
    uint64_t xbzrle_cache_miss;
199
    uint64_t xbzrle_overflows;
200
} AccountingInfo;
201

    
202
static AccountingInfo acct_info;
203

    
204
static void acct_clear(void)
205
{
206
    memset(&acct_info, 0, sizeof(acct_info));
207
}
208

    
209
uint64_t dup_mig_bytes_transferred(void)
210
{
211
    return acct_info.dup_pages * TARGET_PAGE_SIZE;
212
}
213

    
214
uint64_t dup_mig_pages_transferred(void)
215
{
216
    return acct_info.dup_pages;
217
}
218

    
219
uint64_t skipped_mig_bytes_transferred(void)
220
{
221
    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
222
}
223

    
224
uint64_t skipped_mig_pages_transferred(void)
225
{
226
    return acct_info.skipped_pages;
227
}
228

    
229
uint64_t norm_mig_bytes_transferred(void)
230
{
231
    return acct_info.norm_pages * TARGET_PAGE_SIZE;
232
}
233

    
234
uint64_t norm_mig_pages_transferred(void)
235
{
236
    return acct_info.norm_pages;
237
}
238

    
239
uint64_t xbzrle_mig_bytes_transferred(void)
240
{
241
    return acct_info.xbzrle_bytes;
242
}
243

    
244
uint64_t xbzrle_mig_pages_transferred(void)
245
{
246
    return acct_info.xbzrle_pages;
247
}
248

    
249
uint64_t xbzrle_mig_pages_cache_miss(void)
250
{
251
    return acct_info.xbzrle_cache_miss;
252
}
253

    
254
uint64_t xbzrle_mig_pages_overflow(void)
255
{
256
    return acct_info.xbzrle_overflows;
257
}
258

    
259
static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
260
                             int cont, int flag)
261
{
262
    size_t size;
263

    
264
    qemu_put_be64(f, offset | cont | flag);
265
    size = 8;
266

    
267
    if (!cont) {
268
        qemu_put_byte(f, strlen(block->idstr));
269
        qemu_put_buffer(f, (uint8_t *)block->idstr,
270
                        strlen(block->idstr));
271
        size += 1 + strlen(block->idstr);
272
    }
273
    return size;
274
}
275

    
276
#define ENCODING_FLAG_XBZRLE 0x1
277

    
278
static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
279
                            ram_addr_t current_addr, RAMBlock *block,
280
                            ram_addr_t offset, int cont, bool last_stage)
281
{
282
    int encoded_len = 0, bytes_sent = -1;
283
    uint8_t *prev_cached_page;
284

    
285
    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
286
        if (!last_stage) {
287
            if (cache_insert(XBZRLE.cache, current_addr, current_data) == -1) {
288
                return -1;
289
            }
290
        }
291
        acct_info.xbzrle_cache_miss++;
292
        return -1;
293
    }
294

    
295
    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
296

    
297
    /* save current buffer into memory */
298
    memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
299

    
300
    /* XBZRLE encoding (if there is no overflow) */
301
    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
302
                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
303
                                       TARGET_PAGE_SIZE);
304
    if (encoded_len == 0) {
305
        DPRINTF("Skipping unmodified page\n");
306
        return 0;
307
    } else if (encoded_len == -1) {
308
        DPRINTF("Overflow\n");
309
        acct_info.xbzrle_overflows++;
310
        /* update data in the cache */
311
        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
312
        return -1;
313
    }
314

    
315
    /* we need to update the data in the cache, in order to get the same data */
316
    if (!last_stage) {
317
        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
318
    }
319

    
320
    /* Send XBZRLE based compressed page */
321
    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
322
    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
323
    qemu_put_be16(f, encoded_len);
324
    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
325
    bytes_sent += encoded_len + 1 + 2;
326
    acct_info.xbzrle_pages++;
327
    acct_info.xbzrle_bytes += bytes_sent;
328

    
329
    return bytes_sent;
330
}
331

    
332

    
333
/* This is the last block that we have visited serching for dirty pages
334
 */
335
static RAMBlock *last_seen_block;
336
/* This is the last block from where we have sent data */
337
static RAMBlock *last_sent_block;
338
static ram_addr_t last_offset;
339
static unsigned long *migration_bitmap;
340
static uint64_t migration_dirty_pages;
341
static uint32_t last_version;
342
static bool ram_bulk_stage;
343

    
344
static inline
345
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
346
                                                 ram_addr_t start)
347
{
348
    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
349
    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
350
    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
351
    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
352

    
353
    unsigned long next;
354

    
355
    if (ram_bulk_stage && nr > base) {
356
        next = nr + 1;
357
    } else {
358
        next = find_next_bit(migration_bitmap, size, nr);
359
    }
360

    
361
    if (next < size) {
362
        clear_bit(next, migration_bitmap);
363
        migration_dirty_pages--;
364
    }
365
    return (next - base) << TARGET_PAGE_BITS;
366
}
367

    
368
static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
369
{
370
    bool ret;
371
    int nr = addr >> TARGET_PAGE_BITS;
372

    
373
    ret = test_and_set_bit(nr, migration_bitmap);
374

    
375
    if (!ret) {
376
        migration_dirty_pages++;
377
    }
378
    return ret;
379
}
380

    
381
static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
382
{
383
    ram_addr_t addr;
384
    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
385

    
386
    /* start address is aligned at the start of a word? */
387
    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
388
        int k;
389
        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
390
        unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];
391

    
392
        for (k = page; k < page + nr; k++) {
393
            if (src[k]) {
394
                unsigned long new_dirty;
395
                new_dirty = ~migration_bitmap[k];
396
                migration_bitmap[k] |= src[k];
397
                new_dirty &= src[k];
398
                migration_dirty_pages += ctpopl(new_dirty);
399
                src[k] = 0;
400
            }
401
        }
402
    } else {
403
        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
404
            if (cpu_physical_memory_get_dirty(start + addr,
405
                                              TARGET_PAGE_SIZE,
406
                                              DIRTY_MEMORY_MIGRATION)) {
407
                cpu_physical_memory_reset_dirty(start + addr,
408
                                                TARGET_PAGE_SIZE,
409
                                                DIRTY_MEMORY_MIGRATION);
410
                migration_bitmap_set_dirty(start + addr);
411
            }
412
        }
413
    }
414
}
415

    
416

    
417
/* Needs iothread lock! */
418

    
419
static void migration_bitmap_sync(void)
420
{
421
    RAMBlock *block;
422
    uint64_t num_dirty_pages_init = migration_dirty_pages;
423
    MigrationState *s = migrate_get_current();
424
    static int64_t start_time;
425
    static int64_t bytes_xfer_prev;
426
    static int64_t num_dirty_pages_period;
427
    int64_t end_time;
428
    int64_t bytes_xfer_now;
429

    
430
    if (!bytes_xfer_prev) {
431
        bytes_xfer_prev = ram_bytes_transferred();
432
    }
433

    
434
    if (!start_time) {
435
        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
436
    }
437

    
438
    trace_migration_bitmap_sync_start();
439
    address_space_sync_dirty_bitmap(&address_space_memory);
440

    
441
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
442
        migration_bitmap_sync_range(block->mr->ram_addr, block->length);
443
    }
444
    trace_migration_bitmap_sync_end(migration_dirty_pages
445
                                    - num_dirty_pages_init);
446
    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
447
    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
448

    
449
    /* more than 1 second = 1000 millisecons */
450
    if (end_time > start_time + 1000) {
451
        if (migrate_auto_converge()) {
452
            /* The following detection logic can be refined later. For now:
453
               Check to see if the dirtied bytes is 50% more than the approx.
454
               amount of bytes that just got transferred since the last time we
455
               were in this routine. If that happens >N times (for now N==4)
456
               we turn on the throttle down logic */
457
            bytes_xfer_now = ram_bytes_transferred();
458
            if (s->dirty_pages_rate &&
459
               (num_dirty_pages_period * TARGET_PAGE_SIZE >
460
                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
461
               (dirty_rate_high_cnt++ > 4)) {
462
                    trace_migration_throttle();
463
                    mig_throttle_on = true;
464
                    dirty_rate_high_cnt = 0;
465
             }
466
             bytes_xfer_prev = bytes_xfer_now;
467
        } else {
468
             mig_throttle_on = false;
469
        }
470
        s->dirty_pages_rate = num_dirty_pages_period * 1000
471
            / (end_time - start_time);
472
        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
473
        start_time = end_time;
474
        num_dirty_pages_period = 0;
475
    }
476
}
477

    
478
/*
479
 * ram_save_block: Writes a page of memory to the stream f
480
 *
481
 * Returns:  The number of bytes written.
482
 *           0 means no dirty pages
483
 */
484

    
485
static int ram_save_block(QEMUFile *f, bool last_stage)
486
{
487
    RAMBlock *block = last_seen_block;
488
    ram_addr_t offset = last_offset;
489
    bool complete_round = false;
490
    int bytes_sent = 0;
491
    MemoryRegion *mr;
492
    ram_addr_t current_addr;
493

    
494
    if (!block)
495
        block = QTAILQ_FIRST(&ram_list.blocks);
496

    
497
    while (true) {
498
        mr = block->mr;
499
        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
500
        if (complete_round && block == last_seen_block &&
501
            offset >= last_offset) {
502
            break;
503
        }
504
        if (offset >= block->length) {
505
            offset = 0;
506
            block = QTAILQ_NEXT(block, next);
507
            if (!block) {
508
                block = QTAILQ_FIRST(&ram_list.blocks);
509
                complete_round = true;
510
                ram_bulk_stage = false;
511
            }
512
        } else {
513
            int ret;
514
            uint8_t *p;
515
            int cont = (block == last_sent_block) ?
516
                RAM_SAVE_FLAG_CONTINUE : 0;
517

    
518
            p = memory_region_get_ram_ptr(mr) + offset;
519

    
520
            /* In doubt sent page as normal */
521
            bytes_sent = -1;
522
            ret = ram_control_save_page(f, block->offset,
523
                               offset, TARGET_PAGE_SIZE, &bytes_sent);
524

    
525
            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
526
                if (ret != RAM_SAVE_CONTROL_DELAYED) {
527
                    if (bytes_sent > 0) {
528
                        acct_info.norm_pages++;
529
                    } else if (bytes_sent == 0) {
530
                        acct_info.dup_pages++;
531
                    }
532
                }
533
            } else if (is_zero_range(p, TARGET_PAGE_SIZE)) {
534
                acct_info.dup_pages++;
535
                bytes_sent = save_block_hdr(f, block, offset, cont,
536
                                            RAM_SAVE_FLAG_COMPRESS);
537
                qemu_put_byte(f, 0);
538
                bytes_sent++;
539
            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
540
                current_addr = block->offset + offset;
541
                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
542
                                              offset, cont, last_stage);
543
                if (!last_stage) {
544
                    p = get_cached_data(XBZRLE.cache, current_addr);
545
                }
546
            }
547

    
548
            /* XBZRLE overflow or normal page */
549
            if (bytes_sent == -1) {
550
                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
551
                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
552
                bytes_sent += TARGET_PAGE_SIZE;
553
                acct_info.norm_pages++;
554
            }
555

    
556
            /* if page is unmodified, continue to the next */
557
            if (bytes_sent > 0) {
558
                last_sent_block = block;
559
                break;
560
            }
561
        }
562
    }
563
    last_seen_block = block;
564
    last_offset = offset;
565

    
566
    return bytes_sent;
567
}
568

    
569
static uint64_t bytes_transferred;
570

    
571
void acct_update_position(QEMUFile *f, size_t size, bool zero)
572
{
573
    uint64_t pages = size / TARGET_PAGE_SIZE;
574
    if (zero) {
575
        acct_info.dup_pages += pages;
576
    } else {
577
        acct_info.norm_pages += pages;
578
        bytes_transferred += size;
579
        qemu_update_position(f, size);
580
    }
581
}
582

    
583
static ram_addr_t ram_save_remaining(void)
584
{
585
    return migration_dirty_pages;
586
}
587

    
588
uint64_t ram_bytes_remaining(void)
589
{
590
    return ram_save_remaining() * TARGET_PAGE_SIZE;
591
}
592

    
593
uint64_t ram_bytes_transferred(void)
594
{
595
    return bytes_transferred;
596
}
597

    
598
uint64_t ram_bytes_total(void)
599
{
600
    RAMBlock *block;
601
    uint64_t total = 0;
602

    
603
    QTAILQ_FOREACH(block, &ram_list.blocks, next)
604
        total += block->length;
605

    
606
    return total;
607
}
608

    
609
void free_xbzrle_decoded_buf(void)
610
{
611
    g_free(xbzrle_decoded_buf);
612
    xbzrle_decoded_buf = NULL;
613
}
614

    
615
static void migration_end(void)
616
{
617
    if (migration_bitmap) {
618
        memory_global_dirty_log_stop();
619
        g_free(migration_bitmap);
620
        migration_bitmap = NULL;
621
    }
622

    
623
    if (XBZRLE.cache) {
624
        cache_fini(XBZRLE.cache);
625
        g_free(XBZRLE.cache);
626
        g_free(XBZRLE.encoded_buf);
627
        g_free(XBZRLE.current_buf);
628
        XBZRLE.cache = NULL;
629
        XBZRLE.encoded_buf = NULL;
630
        XBZRLE.current_buf = NULL;
631
    }
632
}
633

    
634
static void ram_migration_cancel(void *opaque)
635
{
636
    migration_end();
637
}
638

    
639
static void reset_ram_globals(void)
640
{
641
    last_seen_block = NULL;
642
    last_sent_block = NULL;
643
    last_offset = 0;
644
    last_version = ram_list.version;
645
    ram_bulk_stage = true;
646
}
647

    
648
#define MAX_WAIT 50 /* ms, half buffered_file limit */
649

    
650
static int ram_save_setup(QEMUFile *f, void *opaque)
651
{
652
    RAMBlock *block;
653
    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
654

    
655
    migration_bitmap = bitmap_new(ram_pages);
656
    bitmap_set(migration_bitmap, 0, ram_pages);
657
    migration_dirty_pages = ram_pages;
658
    mig_throttle_on = false;
659
    dirty_rate_high_cnt = 0;
660

    
661
    if (migrate_use_xbzrle()) {
662
        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
663
                                  TARGET_PAGE_SIZE,
664
                                  TARGET_PAGE_SIZE);
665
        if (!XBZRLE.cache) {
666
            DPRINTF("Error creating cache\n");
667
            return -1;
668
        }
669

    
670
        /* We prefer not to abort if there is no memory */
671
        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
672
        if (!XBZRLE.encoded_buf) {
673
            DPRINTF("Error allocating encoded_buf\n");
674
            return -1;
675
        }
676

    
677
        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
678
        if (!XBZRLE.current_buf) {
679
            DPRINTF("Error allocating current_buf\n");
680
            g_free(XBZRLE.encoded_buf);
681
            XBZRLE.encoded_buf = NULL;
682
            return -1;
683
        }
684

    
685
        acct_clear();
686
    }
687

    
688
    qemu_mutex_lock_iothread();
689
    qemu_mutex_lock_ramlist();
690
    bytes_transferred = 0;
691
    reset_ram_globals();
692

    
693
    memory_global_dirty_log_start();
694
    migration_bitmap_sync();
695
    qemu_mutex_unlock_iothread();
696

    
697
    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
698

    
699
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
700
        qemu_put_byte(f, strlen(block->idstr));
701
        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
702
        qemu_put_be64(f, block->length);
703
    }
704

    
705
    qemu_mutex_unlock_ramlist();
706

    
707
    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
708
    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
709

    
710
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
711

    
712
    return 0;
713
}
714

    
715
static int ram_save_iterate(QEMUFile *f, void *opaque)
716
{
717
    int ret;
718
    int i;
719
    int64_t t0;
720
    int total_sent = 0;
721

    
722
    qemu_mutex_lock_ramlist();
723

    
724
    if (ram_list.version != last_version) {
725
        reset_ram_globals();
726
    }
727

    
728
    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
729

    
730
    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
731
    i = 0;
732
    while ((ret = qemu_file_rate_limit(f)) == 0) {
733
        int bytes_sent;
734

    
735
        bytes_sent = ram_save_block(f, false);
736
        /* no more blocks to sent */
737
        if (bytes_sent == 0) {
738
            break;
739
        }
740
        total_sent += bytes_sent;
741
        acct_info.iterations++;
742
        check_guest_throttling();
743
        /* we want to check in the 1st loop, just in case it was the 1st time
744
           and we had to sync the dirty bitmap.
745
           qemu_get_clock_ns() is a bit expensive, so we only check each some
746
           iterations
747
        */
748
        if ((i & 63) == 0) {
749
            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
750
            if (t1 > MAX_WAIT) {
751
                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
752
                        t1, i);
753
                break;
754
            }
755
        }
756
        i++;
757
    }
758

    
759
    qemu_mutex_unlock_ramlist();
760

    
761
    /*
762
     * Must occur before EOS (or any QEMUFile operation)
763
     * because of RDMA protocol.
764
     */
765
    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
766

    
767
    bytes_transferred += total_sent;
768

    
769
    /*
770
     * Do not count these 8 bytes into total_sent, so that we can
771
     * return 0 if no page had been dirtied.
772
     */
773
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
774
    bytes_transferred += 8;
775

    
776
    ret = qemu_file_get_error(f);
777
    if (ret < 0) {
778
        return ret;
779
    }
780

    
781
    return total_sent;
782
}
783

    
784
static int ram_save_complete(QEMUFile *f, void *opaque)
785
{
786
    qemu_mutex_lock_ramlist();
787
    migration_bitmap_sync();
788

    
789
    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
790

    
791
    /* try transferring iterative blocks of memory */
792

    
793
    /* flush all remaining blocks regardless of rate limiting */
794
    while (true) {
795
        int bytes_sent;
796

    
797
        bytes_sent = ram_save_block(f, true);
798
        /* no more blocks to sent */
799
        if (bytes_sent == 0) {
800
            break;
801
        }
802
        bytes_transferred += bytes_sent;
803
    }
804

    
805
    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
806
    migration_end();
807

    
808
    qemu_mutex_unlock_ramlist();
809
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
810

    
811
    return 0;
812
}
813

    
814
static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
815
{
816
    uint64_t remaining_size;
817

    
818
    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
819

    
820
    if (remaining_size < max_size) {
821
        qemu_mutex_lock_iothread();
822
        migration_bitmap_sync();
823
        qemu_mutex_unlock_iothread();
824
        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
825
    }
826
    return remaining_size;
827
}
828

    
829
static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
830
{
831
    int ret, rc = 0;
832
    unsigned int xh_len;
833
    int xh_flags;
834

    
835
    if (!xbzrle_decoded_buf) {
836
        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
837
    }
838

    
839
    /* extract RLE header */
840
    xh_flags = qemu_get_byte(f);
841
    xh_len = qemu_get_be16(f);
842

    
843
    if (xh_flags != ENCODING_FLAG_XBZRLE) {
844
        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
845
        return -1;
846
    }
847

    
848
    if (xh_len > TARGET_PAGE_SIZE) {
849
        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
850
        return -1;
851
    }
852
    /* load data and decode */
853
    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
854

    
855
    /* decode RLE */
856
    ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
857
                               TARGET_PAGE_SIZE);
858
    if (ret == -1) {
859
        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
860
        rc = -1;
861
    } else  if (ret > TARGET_PAGE_SIZE) {
862
        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
863
                ret, TARGET_PAGE_SIZE);
864
        abort();
865
    }
866

    
867
    return rc;
868
}
869

    
870
static inline void *host_from_stream_offset(QEMUFile *f,
871
                                            ram_addr_t offset,
872
                                            int flags)
873
{
874
    static RAMBlock *block = NULL;
875
    char id[256];
876
    uint8_t len;
877

    
878
    if (flags & RAM_SAVE_FLAG_CONTINUE) {
879
        if (!block) {
880
            fprintf(stderr, "Ack, bad migration stream!\n");
881
            return NULL;
882
        }
883

    
884
        return memory_region_get_ram_ptr(block->mr) + offset;
885
    }
886

    
887
    len = qemu_get_byte(f);
888
    qemu_get_buffer(f, (uint8_t *)id, len);
889
    id[len] = 0;
890

    
891
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
892
        if (!strncmp(id, block->idstr, sizeof(id)))
893
            return memory_region_get_ram_ptr(block->mr) + offset;
894
    }
895

    
896
    fprintf(stderr, "Can't find block %s!\n", id);
897
    return NULL;
898
}
899

    
900
/*
901
 * If a page (or a whole RDMA chunk) has been
902
 * determined to be zero, then zap it.
903
 */
904
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
905
{
906
    if (ch != 0 || !is_zero_range(host, size)) {
907
        memset(host, ch, size);
908
    }
909
}
910

    
911
static int ram_load(QEMUFile *f, void *opaque, int version_id)
912
{
913
    ram_addr_t addr;
914
    int flags, ret = 0;
915
    int error;
916
    static uint64_t seq_iter;
917

    
918
    seq_iter++;
919

    
920
    if (version_id < 4 || version_id > 4) {
921
        return -EINVAL;
922
    }
923

    
924
    do {
925
        addr = qemu_get_be64(f);
926

    
927
        flags = addr & ~TARGET_PAGE_MASK;
928
        addr &= TARGET_PAGE_MASK;
929

    
930
        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
931
            if (version_id == 4) {
932
                /* Synchronize RAM block list */
933
                char id[256];
934
                ram_addr_t length;
935
                ram_addr_t total_ram_bytes = addr;
936

    
937
                while (total_ram_bytes) {
938
                    RAMBlock *block;
939
                    uint8_t len;
940

    
941
                    len = qemu_get_byte(f);
942
                    qemu_get_buffer(f, (uint8_t *)id, len);
943
                    id[len] = 0;
944
                    length = qemu_get_be64(f);
945

    
946
                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
947
                        if (!strncmp(id, block->idstr, sizeof(id))) {
948
                            if (block->length != length) {
949
                                fprintf(stderr,
950
                                        "Length mismatch: %s: " RAM_ADDR_FMT
951
                                        " in != " RAM_ADDR_FMT "\n", id, length,
952
                                        block->length);
953
                                ret =  -EINVAL;
954
                                goto done;
955
                            }
956
                            break;
957
                        }
958
                    }
959

    
960
                    if (!block) {
961
                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
962
                                "accept migration\n", id);
963
                        ret = -EINVAL;
964
                        goto done;
965
                    }
966

    
967
                    total_ram_bytes -= length;
968
                }
969
            }
970
        }
971

    
972
        if (flags & RAM_SAVE_FLAG_COMPRESS) {
973
            void *host;
974
            uint8_t ch;
975

    
976
            host = host_from_stream_offset(f, addr, flags);
977
            if (!host) {
978
                return -EINVAL;
979
            }
980

    
981
            ch = qemu_get_byte(f);
982
            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
983
        } else if (flags & RAM_SAVE_FLAG_PAGE) {
984
            void *host;
985

    
986
            host = host_from_stream_offset(f, addr, flags);
987
            if (!host) {
988
                return -EINVAL;
989
            }
990

    
991
            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
992
        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
993
            void *host = host_from_stream_offset(f, addr, flags);
994
            if (!host) {
995
                return -EINVAL;
996
            }
997

    
998
            if (load_xbzrle(f, addr, host) < 0) {
999
                ret = -EINVAL;
1000
                goto done;
1001
            }
1002
        } else if (flags & RAM_SAVE_FLAG_HOOK) {
1003
            ram_control_load_hook(f, flags);
1004
        }
1005
        error = qemu_file_get_error(f);
1006
        if (error) {
1007
            ret = error;
1008
            goto done;
1009
        }
1010
    } while (!(flags & RAM_SAVE_FLAG_EOS));
1011

    
1012
done:
1013
    DPRINTF("Completed load of VM with exit code %d seq iteration "
1014
            "%" PRIu64 "\n", ret, seq_iter);
1015
    return ret;
1016
}
1017

    
1018
SaveVMHandlers savevm_ram_handlers = {
1019
    .save_live_setup = ram_save_setup,
1020
    .save_live_iterate = ram_save_iterate,
1021
    .save_live_complete = ram_save_complete,
1022
    .save_live_pending = ram_save_pending,
1023
    .load_state = ram_load,
1024
    .cancel = ram_migration_cancel,
1025
};
1026

    
1027
struct soundhw {
1028
    const char *name;
1029
    const char *descr;
1030
    int enabled;
1031
    int isa;
1032
    union {
1033
        int (*init_isa) (ISABus *bus);
1034
        int (*init_pci) (PCIBus *bus);
1035
    } init;
1036
};
1037

    
1038
static struct soundhw soundhw[9];
1039
static int soundhw_count;
1040

    
1041
void isa_register_soundhw(const char *name, const char *descr,
1042
                          int (*init_isa)(ISABus *bus))
1043
{
1044
    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
1045
    soundhw[soundhw_count].name = name;
1046
    soundhw[soundhw_count].descr = descr;
1047
    soundhw[soundhw_count].isa = 1;
1048
    soundhw[soundhw_count].init.init_isa = init_isa;
1049
    soundhw_count++;
1050
}
1051

    
1052
void pci_register_soundhw(const char *name, const char *descr,
1053
                          int (*init_pci)(PCIBus *bus))
1054
{
1055
    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
1056
    soundhw[soundhw_count].name = name;
1057
    soundhw[soundhw_count].descr = descr;
1058
    soundhw[soundhw_count].isa = 0;
1059
    soundhw[soundhw_count].init.init_pci = init_pci;
1060
    soundhw_count++;
1061
}
1062

    
1063
void select_soundhw(const char *optarg)
1064
{
1065
    struct soundhw *c;
1066

    
1067
    if (is_help_option(optarg)) {
1068
    show_valid_cards:
1069

    
1070
        if (soundhw_count) {
1071
             printf("Valid sound card names (comma separated):\n");
1072
             for (c = soundhw; c->name; ++c) {
1073
                 printf ("%-11s %s\n", c->name, c->descr);
1074
             }
1075
             printf("\n-soundhw all will enable all of the above\n");
1076
        } else {
1077
             printf("Machine has no user-selectable audio hardware "
1078
                    "(it may or may not have always-present audio hardware).\n");
1079
        }
1080
        exit(!is_help_option(optarg));
1081
    }
1082
    else {
1083
        size_t l;
1084
        const char *p;
1085
        char *e;
1086
        int bad_card = 0;
1087

    
1088
        if (!strcmp(optarg, "all")) {
1089
            for (c = soundhw; c->name; ++c) {
1090
                c->enabled = 1;
1091
            }
1092
            return;
1093
        }
1094

    
1095
        p = optarg;
1096
        while (*p) {
1097
            e = strchr(p, ',');
1098
            l = !e ? strlen(p) : (size_t) (e - p);
1099

    
1100
            for (c = soundhw; c->name; ++c) {
1101
                if (!strncmp(c->name, p, l) && !c->name[l]) {
1102
                    c->enabled = 1;
1103
                    break;
1104
                }
1105
            }
1106

    
1107
            if (!c->name) {
1108
                if (l > 80) {
1109
                    fprintf(stderr,
1110
                            "Unknown sound card name (too big to show)\n");
1111
                }
1112
                else {
1113
                    fprintf(stderr, "Unknown sound card name `%.*s'\n",
1114
                            (int) l, p);
1115
                }
1116
                bad_card = 1;
1117
            }
1118
            p += l + (e != NULL);
1119
        }
1120

    
1121
        if (bad_card) {
1122
            goto show_valid_cards;
1123
        }
1124
    }
1125
}
1126

    
1127
void audio_init(void)
1128
{
1129
    struct soundhw *c;
1130
    ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
1131
    PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
1132

    
1133
    for (c = soundhw; c->name; ++c) {
1134
        if (c->enabled) {
1135
            if (c->isa) {
1136
                if (!isa_bus) {
1137
                    fprintf(stderr, "ISA bus not available for %s\n", c->name);
1138
                    exit(1);
1139
                }
1140
                c->init.init_isa(isa_bus);
1141
            } else {
1142
                if (!pci_bus) {
1143
                    fprintf(stderr, "PCI bus not available for %s\n", c->name);
1144
                    exit(1);
1145
                }
1146
                c->init.init_pci(pci_bus);
1147
            }
1148
        }
1149
    }
1150
}
1151

    
1152
int qemu_uuid_parse(const char *str, uint8_t *uuid)
1153
{
1154
    int ret;
1155

    
1156
    if (strlen(str) != 36) {
1157
        return -1;
1158
    }
1159

    
1160
    ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
1161
                 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
1162
                 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14],
1163
                 &uuid[15]);
1164

    
1165
    if (ret != 16) {
1166
        return -1;
1167
    }
1168
    return 0;
1169
}
1170

    
1171
void do_acpitable_option(const QemuOpts *opts)
1172
{
1173
#ifdef TARGET_I386
1174
    Error *err = NULL;
1175

    
1176
    acpi_table_add(opts, &err);
1177
    if (err) {
1178
        error_report("Wrong acpi table provided: %s",
1179
                     error_get_pretty(err));
1180
        error_free(err);
1181
        exit(1);
1182
    }
1183
#endif
1184
}
1185

    
1186
void do_smbios_option(QemuOpts *opts)
1187
{
1188
#ifdef TARGET_I386
1189
    smbios_entry_add(opts);
1190
#endif
1191
}
1192

    
1193
void cpudef_init(void)
1194
{
1195
#if defined(cpudef_setup)
1196
    cpudef_setup(); /* parse cpu definitions in target config file */
1197
#endif
1198
}
1199

    
1200
int tcg_available(void)
1201
{
1202
    return 1;
1203
}
1204

    
1205
int kvm_available(void)
1206
{
1207
#ifdef CONFIG_KVM
1208
    return 1;
1209
#else
1210
    return 0;
1211
#endif
1212
}
1213

    
1214
int xen_available(void)
1215
{
1216
#ifdef CONFIG_XEN
1217
    return 1;
1218
#else
1219
    return 0;
1220
#endif
1221
}
1222

    
1223

    
1224
TargetInfo *qmp_query_target(Error **errp)
1225
{
1226
    TargetInfo *info = g_malloc0(sizeof(*info));
1227

    
1228
    info->arch = g_strdup(TARGET_NAME);
1229

    
1230
    return info;
1231
}
1232

    
1233
/* Stub function that's gets run on the vcpu when its brought out of the
1234
   VM to run inside qemu via async_run_on_cpu()*/
1235
static void mig_sleep_cpu(void *opq)
1236
{
1237
    qemu_mutex_unlock_iothread();
1238
    g_usleep(30*1000);
1239
    qemu_mutex_lock_iothread();
1240
}
1241

    
1242
/* To reduce the dirty rate explicitly disallow the VCPUs from spending
1243
   much time in the VM. The migration thread will try to catchup.
1244
   Workload will experience a performance drop.
1245
*/
1246
static void mig_throttle_guest_down(void)
1247
{
1248
    CPUState *cpu;
1249

    
1250
    qemu_mutex_lock_iothread();
1251
    CPU_FOREACH(cpu) {
1252
        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
1253
    }
1254
    qemu_mutex_unlock_iothread();
1255
}
1256

    
1257
static void check_guest_throttling(void)
1258
{
1259
    static int64_t t0;
1260
    int64_t        t1;
1261

    
1262
    if (!mig_throttle_on) {
1263
        return;
1264
    }
1265

    
1266
    if (!t0)  {
1267
        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1268
        return;
1269
    }
1270

    
1271
    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1272

    
1273
    /* If it has been more than 40 ms since the last time the guest
1274
     * was throttled then do it again.
1275
     */
1276
    if (40 < (t1-t0)/1000000) {
1277
        mig_throttle_guest_down();
1278
        t0 = t1;
1279
    }
1280
}