Statistics
| Branch: | Revision:

root / dump.c @ feature-archipelago

History | View | Annotate | Download (51.8 kB)

1
/*
2
 * QEMU dump
3
 *
4
 * Copyright Fujitsu, Corp. 2011, 2012
5
 *
6
 * Authors:
7
 *     Wen Congyang <wency@cn.fujitsu.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10
 * See the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include "qemu-common.h"
15
#include "elf.h"
16
#include "cpu.h"
17
#include "exec/cpu-all.h"
18
#include "exec/hwaddr.h"
19
#include "monitor/monitor.h"
20
#include "sysemu/kvm.h"
21
#include "sysemu/dump.h"
22
#include "sysemu/sysemu.h"
23
#include "sysemu/memory_mapping.h"
24
#include "sysemu/cpus.h"
25
#include "qapi/error.h"
26
#include "qmp-commands.h"
27

    
28
#include <zlib.h>
29
#ifdef CONFIG_LZO
30
#include <lzo/lzo1x.h>
31
#endif
32
#ifdef CONFIG_SNAPPY
33
#include <snappy-c.h>
34
#endif
35
#ifndef ELF_MACHINE_UNAME
36
#define ELF_MACHINE_UNAME "Unknown"
37
#endif
38

    
39
static uint16_t cpu_convert_to_target16(uint16_t val, int endian)
40
{
41
    if (endian == ELFDATA2LSB) {
42
        val = cpu_to_le16(val);
43
    } else {
44
        val = cpu_to_be16(val);
45
    }
46

    
47
    return val;
48
}
49

    
50
static uint32_t cpu_convert_to_target32(uint32_t val, int endian)
51
{
52
    if (endian == ELFDATA2LSB) {
53
        val = cpu_to_le32(val);
54
    } else {
55
        val = cpu_to_be32(val);
56
    }
57

    
58
    return val;
59
}
60

    
61
static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
62
{
63
    if (endian == ELFDATA2LSB) {
64
        val = cpu_to_le64(val);
65
    } else {
66
        val = cpu_to_be64(val);
67
    }
68

    
69
    return val;
70
}
71

    
72
typedef struct DumpState {
73
    GuestPhysBlockList guest_phys_blocks;
74
    ArchDumpInfo dump_info;
75
    MemoryMappingList list;
76
    uint16_t phdr_num;
77
    uint32_t sh_info;
78
    bool have_section;
79
    bool resume;
80
    ssize_t note_size;
81
    hwaddr memory_offset;
82
    int fd;
83

    
84
    GuestPhysBlock *next_block;
85
    ram_addr_t start;
86
    bool has_filter;
87
    int64_t begin;
88
    int64_t length;
89
    Error **errp;
90

    
91
    uint8_t *note_buf;          /* buffer for notes */
92
    size_t note_buf_offset;     /* the writing place in note_buf */
93
    uint32_t nr_cpus;           /* number of guest's cpu */
94
    size_t page_size;           /* guest's page size */
95
    uint32_t page_shift;        /* guest's page shift */
96
    uint64_t max_mapnr;         /* the biggest guest's phys-mem's number */
97
    size_t len_dump_bitmap;     /* the size of the place used to store
98
                                   dump_bitmap in vmcore */
99
    off_t offset_dump_bitmap;   /* offset of dump_bitmap part in vmcore */
100
    off_t offset_page;          /* offset of page part in vmcore */
101
    size_t num_dumpable;        /* number of page that can be dumped */
102
    uint32_t flag_compress;     /* indicate the compression format */
103
} DumpState;
104

    
105
static int dump_cleanup(DumpState *s)
106
{
107
    int ret = 0;
108

    
109
    guest_phys_blocks_free(&s->guest_phys_blocks);
110
    memory_mapping_list_free(&s->list);
111
    if (s->fd != -1) {
112
        close(s->fd);
113
    }
114
    if (s->resume) {
115
        vm_start();
116
    }
117

    
118
    return ret;
119
}
120

    
121
static void dump_error(DumpState *s, const char *reason)
122
{
123
    dump_cleanup(s);
124
}
125

    
126
static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
127
{
128
    DumpState *s = opaque;
129
    size_t written_size;
130

    
131
    written_size = qemu_write_full(s->fd, buf, size);
132
    if (written_size != size) {
133
        return -1;
134
    }
135

    
136
    return 0;
137
}
138

    
139
static int write_elf64_header(DumpState *s)
140
{
141
    Elf64_Ehdr elf_header;
142
    int ret;
143
    int endian = s->dump_info.d_endian;
144

    
145
    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
146
    memcpy(&elf_header, ELFMAG, SELFMAG);
147
    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
148
    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
149
    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
150
    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
151
    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
152
                                                   endian);
153
    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
154
    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
155
    elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
156
    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
157
                                                     endian);
158
    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
159
    if (s->have_section) {
160
        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
161

    
162
        elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
163
        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
164
                                                         endian);
165
        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
166
    }
167

    
168
    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
169
    if (ret < 0) {
170
        dump_error(s, "dump: failed to write elf header.\n");
171
        return -1;
172
    }
173

    
174
    return 0;
175
}
176

    
177
static int write_elf32_header(DumpState *s)
178
{
179
    Elf32_Ehdr elf_header;
180
    int ret;
181
    int endian = s->dump_info.d_endian;
182

    
183
    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
184
    memcpy(&elf_header, ELFMAG, SELFMAG);
185
    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
186
    elf_header.e_ident[EI_DATA] = endian;
187
    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
188
    elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
189
    elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
190
                                                   endian);
191
    elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
192
    elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
193
    elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
194
    elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
195
                                                     endian);
196
    elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
197
    if (s->have_section) {
198
        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
199

    
200
        elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
201
        elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
202
                                                         endian);
203
        elf_header.e_shnum = cpu_convert_to_target16(1, endian);
204
    }
205

    
206
    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
207
    if (ret < 0) {
208
        dump_error(s, "dump: failed to write elf header.\n");
209
        return -1;
210
    }
211

    
212
    return 0;
213
}
214

    
215
static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
216
                            int phdr_index, hwaddr offset,
217
                            hwaddr filesz)
218
{
219
    Elf64_Phdr phdr;
220
    int ret;
221
    int endian = s->dump_info.d_endian;
222

    
223
    memset(&phdr, 0, sizeof(Elf64_Phdr));
224
    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
225
    phdr.p_offset = cpu_convert_to_target64(offset, endian);
226
    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
227
    phdr.p_filesz = cpu_convert_to_target64(filesz, endian);
228
    phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
229
    phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
230

    
231
    assert(memory_mapping->length >= filesz);
232

    
233
    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
234
    if (ret < 0) {
235
        dump_error(s, "dump: failed to write program header table.\n");
236
        return -1;
237
    }
238

    
239
    return 0;
240
}
241

    
242
static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
243
                            int phdr_index, hwaddr offset,
244
                            hwaddr filesz)
245
{
246
    Elf32_Phdr phdr;
247
    int ret;
248
    int endian = s->dump_info.d_endian;
249

    
250
    memset(&phdr, 0, sizeof(Elf32_Phdr));
251
    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
252
    phdr.p_offset = cpu_convert_to_target32(offset, endian);
253
    phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
254
    phdr.p_filesz = cpu_convert_to_target32(filesz, endian);
255
    phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
256
    phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
257

    
258
    assert(memory_mapping->length >= filesz);
259

    
260
    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
261
    if (ret < 0) {
262
        dump_error(s, "dump: failed to write program header table.\n");
263
        return -1;
264
    }
265

    
266
    return 0;
267
}
268

    
269
static int write_elf64_note(DumpState *s)
270
{
271
    Elf64_Phdr phdr;
272
    int endian = s->dump_info.d_endian;
273
    hwaddr begin = s->memory_offset - s->note_size;
274
    int ret;
275

    
276
    memset(&phdr, 0, sizeof(Elf64_Phdr));
277
    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
278
    phdr.p_offset = cpu_convert_to_target64(begin, endian);
279
    phdr.p_paddr = 0;
280
    phdr.p_filesz = cpu_convert_to_target64(s->note_size, endian);
281
    phdr.p_memsz = cpu_convert_to_target64(s->note_size, endian);
282
    phdr.p_vaddr = 0;
283

    
284
    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
285
    if (ret < 0) {
286
        dump_error(s, "dump: failed to write program header table.\n");
287
        return -1;
288
    }
289

    
290
    return 0;
291
}
292

    
293
static inline int cpu_index(CPUState *cpu)
294
{
295
    return cpu->cpu_index + 1;
296
}
297

    
298
static int write_elf64_notes(WriteCoreDumpFunction f, DumpState *s)
299
{
300
    CPUState *cpu;
301
    int ret;
302
    int id;
303

    
304
    CPU_FOREACH(cpu) {
305
        id = cpu_index(cpu);
306
        ret = cpu_write_elf64_note(f, cpu, id, s);
307
        if (ret < 0) {
308
            dump_error(s, "dump: failed to write elf notes.\n");
309
            return -1;
310
        }
311
    }
312

    
313
    CPU_FOREACH(cpu) {
314
        ret = cpu_write_elf64_qemunote(f, cpu, s);
315
        if (ret < 0) {
316
            dump_error(s, "dump: failed to write CPU status.\n");
317
            return -1;
318
        }
319
    }
320

    
321
    return 0;
322
}
323

    
324
static int write_elf32_note(DumpState *s)
325
{
326
    hwaddr begin = s->memory_offset - s->note_size;
327
    Elf32_Phdr phdr;
328
    int endian = s->dump_info.d_endian;
329
    int ret;
330

    
331
    memset(&phdr, 0, sizeof(Elf32_Phdr));
332
    phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
333
    phdr.p_offset = cpu_convert_to_target32(begin, endian);
334
    phdr.p_paddr = 0;
335
    phdr.p_filesz = cpu_convert_to_target32(s->note_size, endian);
336
    phdr.p_memsz = cpu_convert_to_target32(s->note_size, endian);
337
    phdr.p_vaddr = 0;
338

    
339
    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
340
    if (ret < 0) {
341
        dump_error(s, "dump: failed to write program header table.\n");
342
        return -1;
343
    }
344

    
345
    return 0;
346
}
347

    
348
static int write_elf32_notes(WriteCoreDumpFunction f, DumpState *s)
349
{
350
    CPUState *cpu;
351
    int ret;
352
    int id;
353

    
354
    CPU_FOREACH(cpu) {
355
        id = cpu_index(cpu);
356
        ret = cpu_write_elf32_note(f, cpu, id, s);
357
        if (ret < 0) {
358
            dump_error(s, "dump: failed to write elf notes.\n");
359
            return -1;
360
        }
361
    }
362

    
363
    CPU_FOREACH(cpu) {
364
        ret = cpu_write_elf32_qemunote(f, cpu, s);
365
        if (ret < 0) {
366
            dump_error(s, "dump: failed to write CPU status.\n");
367
            return -1;
368
        }
369
    }
370

    
371
    return 0;
372
}
373

    
374
static int write_elf_section(DumpState *s, int type)
375
{
376
    Elf32_Shdr shdr32;
377
    Elf64_Shdr shdr64;
378
    int endian = s->dump_info.d_endian;
379
    int shdr_size;
380
    void *shdr;
381
    int ret;
382

    
383
    if (type == 0) {
384
        shdr_size = sizeof(Elf32_Shdr);
385
        memset(&shdr32, 0, shdr_size);
386
        shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
387
        shdr = &shdr32;
388
    } else {
389
        shdr_size = sizeof(Elf64_Shdr);
390
        memset(&shdr64, 0, shdr_size);
391
        shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
392
        shdr = &shdr64;
393
    }
394

    
395
    ret = fd_write_vmcore(&shdr, shdr_size, s);
396
    if (ret < 0) {
397
        dump_error(s, "dump: failed to write section header table.\n");
398
        return -1;
399
    }
400

    
401
    return 0;
402
}
403

    
404
static int write_data(DumpState *s, void *buf, int length)
405
{
406
    int ret;
407

    
408
    ret = fd_write_vmcore(buf, length, s);
409
    if (ret < 0) {
410
        dump_error(s, "dump: failed to save memory.\n");
411
        return -1;
412
    }
413

    
414
    return 0;
415
}
416

    
417
/* write the memroy to vmcore. 1 page per I/O. */
418
static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
419
                        int64_t size)
420
{
421
    int64_t i;
422
    int ret;
423

    
424
    for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
425
        ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
426
                         TARGET_PAGE_SIZE);
427
        if (ret < 0) {
428
            return ret;
429
        }
430
    }
431

    
432
    if ((size % TARGET_PAGE_SIZE) != 0) {
433
        ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
434
                         size % TARGET_PAGE_SIZE);
435
        if (ret < 0) {
436
            return ret;
437
        }
438
    }
439

    
440
    return 0;
441
}
442

    
443
/* get the memory's offset and size in the vmcore */
444
static void get_offset_range(hwaddr phys_addr,
445
                             ram_addr_t mapping_length,
446
                             DumpState *s,
447
                             hwaddr *p_offset,
448
                             hwaddr *p_filesz)
449
{
450
    GuestPhysBlock *block;
451
    hwaddr offset = s->memory_offset;
452
    int64_t size_in_block, start;
453

    
454
    /* When the memory is not stored into vmcore, offset will be -1 */
455
    *p_offset = -1;
456
    *p_filesz = 0;
457

    
458
    if (s->has_filter) {
459
        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
460
            return;
461
        }
462
    }
463

    
464
    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
465
        if (s->has_filter) {
466
            if (block->target_start >= s->begin + s->length ||
467
                block->target_end <= s->begin) {
468
                /* This block is out of the range */
469
                continue;
470
            }
471

    
472
            if (s->begin <= block->target_start) {
473
                start = block->target_start;
474
            } else {
475
                start = s->begin;
476
            }
477

    
478
            size_in_block = block->target_end - start;
479
            if (s->begin + s->length < block->target_end) {
480
                size_in_block -= block->target_end - (s->begin + s->length);
481
            }
482
        } else {
483
            start = block->target_start;
484
            size_in_block = block->target_end - block->target_start;
485
        }
486

    
487
        if (phys_addr >= start && phys_addr < start + size_in_block) {
488
            *p_offset = phys_addr - start + offset;
489

    
490
            /* The offset range mapped from the vmcore file must not spill over
491
             * the GuestPhysBlock, clamp it. The rest of the mapping will be
492
             * zero-filled in memory at load time; see
493
             * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
494
             */
495
            *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
496
                        mapping_length :
497
                        size_in_block - (phys_addr - start);
498
            return;
499
        }
500

    
501
        offset += size_in_block;
502
    }
503
}
504

    
505
static int write_elf_loads(DumpState *s)
506
{
507
    hwaddr offset, filesz;
508
    MemoryMapping *memory_mapping;
509
    uint32_t phdr_index = 1;
510
    int ret;
511
    uint32_t max_index;
512

    
513
    if (s->have_section) {
514
        max_index = s->sh_info;
515
    } else {
516
        max_index = s->phdr_num;
517
    }
518

    
519
    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
520
        get_offset_range(memory_mapping->phys_addr,
521
                         memory_mapping->length,
522
                         s, &offset, &filesz);
523
        if (s->dump_info.d_class == ELFCLASS64) {
524
            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset,
525
                                   filesz);
526
        } else {
527
            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset,
528
                                   filesz);
529
        }
530

    
531
        if (ret < 0) {
532
            return -1;
533
        }
534

    
535
        if (phdr_index >= max_index) {
536
            break;
537
        }
538
    }
539

    
540
    return 0;
541
}
542

    
543
/* write elf header, PT_NOTE and elf note to vmcore. */
544
static int dump_begin(DumpState *s)
545
{
546
    int ret;
547

    
548
    /*
549
     * the vmcore's format is:
550
     *   --------------
551
     *   |  elf header |
552
     *   --------------
553
     *   |  PT_NOTE    |
554
     *   --------------
555
     *   |  PT_LOAD    |
556
     *   --------------
557
     *   |  ......     |
558
     *   --------------
559
     *   |  PT_LOAD    |
560
     *   --------------
561
     *   |  sec_hdr    |
562
     *   --------------
563
     *   |  elf note   |
564
     *   --------------
565
     *   |  memory     |
566
     *   --------------
567
     *
568
     * we only know where the memory is saved after we write elf note into
569
     * vmcore.
570
     */
571

    
572
    /* write elf header to vmcore */
573
    if (s->dump_info.d_class == ELFCLASS64) {
574
        ret = write_elf64_header(s);
575
    } else {
576
        ret = write_elf32_header(s);
577
    }
578
    if (ret < 0) {
579
        return -1;
580
    }
581

    
582
    if (s->dump_info.d_class == ELFCLASS64) {
583
        /* write PT_NOTE to vmcore */
584
        if (write_elf64_note(s) < 0) {
585
            return -1;
586
        }
587

    
588
        /* write all PT_LOAD to vmcore */
589
        if (write_elf_loads(s) < 0) {
590
            return -1;
591
        }
592

    
593
        /* write section to vmcore */
594
        if (s->have_section) {
595
            if (write_elf_section(s, 1) < 0) {
596
                return -1;
597
            }
598
        }
599

    
600
        /* write notes to vmcore */
601
        if (write_elf64_notes(fd_write_vmcore, s) < 0) {
602
            return -1;
603
        }
604

    
605
    } else {
606
        /* write PT_NOTE to vmcore */
607
        if (write_elf32_note(s) < 0) {
608
            return -1;
609
        }
610

    
611
        /* write all PT_LOAD to vmcore */
612
        if (write_elf_loads(s) < 0) {
613
            return -1;
614
        }
615

    
616
        /* write section to vmcore */
617
        if (s->have_section) {
618
            if (write_elf_section(s, 0) < 0) {
619
                return -1;
620
            }
621
        }
622

    
623
        /* write notes to vmcore */
624
        if (write_elf32_notes(fd_write_vmcore, s) < 0) {
625
            return -1;
626
        }
627
    }
628

    
629
    return 0;
630
}
631

    
632
/* write PT_LOAD to vmcore */
633
static int dump_completed(DumpState *s)
634
{
635
    dump_cleanup(s);
636
    return 0;
637
}
638

    
639
static int get_next_block(DumpState *s, GuestPhysBlock *block)
640
{
641
    while (1) {
642
        block = QTAILQ_NEXT(block, next);
643
        if (!block) {
644
            /* no more block */
645
            return 1;
646
        }
647

    
648
        s->start = 0;
649
        s->next_block = block;
650
        if (s->has_filter) {
651
            if (block->target_start >= s->begin + s->length ||
652
                block->target_end <= s->begin) {
653
                /* This block is out of the range */
654
                continue;
655
            }
656

    
657
            if (s->begin > block->target_start) {
658
                s->start = s->begin - block->target_start;
659
            }
660
        }
661

    
662
        return 0;
663
    }
664
}
665

    
666
/* write all memory to vmcore */
667
static int dump_iterate(DumpState *s)
668
{
669
    GuestPhysBlock *block;
670
    int64_t size;
671
    int ret;
672

    
673
    while (1) {
674
        block = s->next_block;
675

    
676
        size = block->target_end - block->target_start;
677
        if (s->has_filter) {
678
            size -= s->start;
679
            if (s->begin + s->length < block->target_end) {
680
                size -= block->target_end - (s->begin + s->length);
681
            }
682
        }
683
        ret = write_memory(s, block, s->start, size);
684
        if (ret == -1) {
685
            return ret;
686
        }
687

    
688
        ret = get_next_block(s, block);
689
        if (ret == 1) {
690
            dump_completed(s);
691
            return 0;
692
        }
693
    }
694
}
695

    
696
static int create_vmcore(DumpState *s)
697
{
698
    int ret;
699

    
700
    ret = dump_begin(s);
701
    if (ret < 0) {
702
        return -1;
703
    }
704

    
705
    ret = dump_iterate(s);
706
    if (ret < 0) {
707
        return -1;
708
    }
709

    
710
    return 0;
711
}
712

    
713
static int write_start_flat_header(int fd)
714
{
715
    uint8_t *buf;
716
    MakedumpfileHeader mh;
717
    int ret = 0;
718

    
719
    memset(&mh, 0, sizeof(mh));
720
    strncpy(mh.signature, MAKEDUMPFILE_SIGNATURE,
721
            strlen(MAKEDUMPFILE_SIGNATURE));
722

    
723
    mh.type = cpu_to_be64(TYPE_FLAT_HEADER);
724
    mh.version = cpu_to_be64(VERSION_FLAT_HEADER);
725

    
726
    buf = g_malloc0(MAX_SIZE_MDF_HEADER);
727
    memcpy(buf, &mh, sizeof(mh));
728

    
729
    size_t written_size;
730
    written_size = qemu_write_full(fd, buf, MAX_SIZE_MDF_HEADER);
731
    if (written_size != MAX_SIZE_MDF_HEADER) {
732
        ret = -1;
733
    }
734

    
735
    g_free(buf);
736
    return ret;
737
}
738

    
739
static int write_end_flat_header(int fd)
740
{
741
    MakedumpfileDataHeader mdh;
742

    
743
    mdh.offset = END_FLAG_FLAT_HEADER;
744
    mdh.buf_size = END_FLAG_FLAT_HEADER;
745

    
746
    size_t written_size;
747
    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
748
    if (written_size != sizeof(mdh)) {
749
        return -1;
750
    }
751

    
752
    return 0;
753
}
754

    
755
static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
756
{
757
    size_t written_size;
758
    MakedumpfileDataHeader mdh;
759

    
760
    mdh.offset = cpu_to_be64(offset);
761
    mdh.buf_size = cpu_to_be64(size);
762

    
763
    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
764
    if (written_size != sizeof(mdh)) {
765
        return -1;
766
    }
767

    
768
    written_size = qemu_write_full(fd, buf, size);
769
    if (written_size != size) {
770
        return -1;
771
    }
772

    
773
    return 0;
774
}
775

    
776
static int buf_write_note(const void *buf, size_t size, void *opaque)
777
{
778
    DumpState *s = opaque;
779

    
780
    /* note_buf is not enough */
781
    if (s->note_buf_offset + size > s->note_size) {
782
        return -1;
783
    }
784

    
785
    memcpy(s->note_buf + s->note_buf_offset, buf, size);
786

    
787
    s->note_buf_offset += size;
788

    
789
    return 0;
790
}
791

    
792
/* write common header, sub header and elf note to vmcore */
793
static int create_header32(DumpState *s)
794
{
795
    int ret = 0;
796
    DiskDumpHeader32 *dh = NULL;
797
    KdumpSubHeader32 *kh = NULL;
798
    size_t size;
799
    int endian = s->dump_info.d_endian;
800
    uint32_t block_size;
801
    uint32_t sub_hdr_size;
802
    uint32_t bitmap_blocks;
803
    uint32_t status = 0;
804
    uint64_t offset_note;
805

    
806
    /* write common header, the version of kdump-compressed format is 6th */
807
    size = sizeof(DiskDumpHeader32);
808
    dh = g_malloc0(size);
809

    
810
    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
811
    dh->header_version = cpu_convert_to_target32(6, endian);
812
    block_size = s->page_size;
813
    dh->block_size = cpu_convert_to_target32(block_size, endian);
814
    sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
815
    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
816
    dh->sub_hdr_size = cpu_convert_to_target32(sub_hdr_size, endian);
817
    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
818
    dh->max_mapnr = cpu_convert_to_target32(MIN(s->max_mapnr, UINT_MAX),
819
                                            endian);
820
    dh->nr_cpus = cpu_convert_to_target32(s->nr_cpus, endian);
821
    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
822
    dh->bitmap_blocks = cpu_convert_to_target32(bitmap_blocks, endian);
823
    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
824

    
825
    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
826
        status |= DUMP_DH_COMPRESSED_ZLIB;
827
    }
828
#ifdef CONFIG_LZO
829
    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
830
        status |= DUMP_DH_COMPRESSED_LZO;
831
    }
832
#endif
833
#ifdef CONFIG_SNAPPY
834
    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
835
        status |= DUMP_DH_COMPRESSED_SNAPPY;
836
    }
837
#endif
838
    dh->status = cpu_convert_to_target32(status, endian);
839

    
840
    if (write_buffer(s->fd, 0, dh, size) < 0) {
841
        dump_error(s, "dump: failed to write disk dump header.\n");
842
        ret = -1;
843
        goto out;
844
    }
845

    
846
    /* write sub header */
847
    size = sizeof(KdumpSubHeader32);
848
    kh = g_malloc0(size);
849

    
850
    /* 64bit max_mapnr_64 */
851
    kh->max_mapnr_64 = cpu_convert_to_target64(s->max_mapnr, endian);
852
    kh->phys_base = cpu_convert_to_target32(PHYS_BASE, endian);
853
    kh->dump_level = cpu_convert_to_target32(DUMP_LEVEL, endian);
854

    
855
    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
856
    kh->offset_note = cpu_convert_to_target64(offset_note, endian);
857
    kh->note_size = cpu_convert_to_target32(s->note_size, endian);
858

    
859
    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
860
                     block_size, kh, size) < 0) {
861
        dump_error(s, "dump: failed to write kdump sub header.\n");
862
        ret = -1;
863
        goto out;
864
    }
865

    
866
    /* write note */
867
    s->note_buf = g_malloc0(s->note_size);
868
    s->note_buf_offset = 0;
869

    
870
    /* use s->note_buf to store notes temporarily */
871
    if (write_elf32_notes(buf_write_note, s) < 0) {
872
        ret = -1;
873
        goto out;
874
    }
875

    
876
    if (write_buffer(s->fd, offset_note, s->note_buf,
877
                     s->note_size) < 0) {
878
        dump_error(s, "dump: failed to write notes");
879
        ret = -1;
880
        goto out;
881
    }
882

    
883
    /* get offset of dump_bitmap */
884
    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
885
                             block_size;
886

    
887
    /* get offset of page */
888
    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
889
                     block_size;
890

    
891
out:
892
    g_free(dh);
893
    g_free(kh);
894
    g_free(s->note_buf);
895

    
896
    return ret;
897
}
898

    
899
/* write common header, sub header and elf note to vmcore */
900
static int create_header64(DumpState *s)
901
{
902
    int ret = 0;
903
    DiskDumpHeader64 *dh = NULL;
904
    KdumpSubHeader64 *kh = NULL;
905
    size_t size;
906
    int endian = s->dump_info.d_endian;
907
    uint32_t block_size;
908
    uint32_t sub_hdr_size;
909
    uint32_t bitmap_blocks;
910
    uint32_t status = 0;
911
    uint64_t offset_note;
912

    
913
    /* write common header, the version of kdump-compressed format is 6th */
914
    size = sizeof(DiskDumpHeader64);
915
    dh = g_malloc0(size);
916

    
917
    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
918
    dh->header_version = cpu_convert_to_target32(6, endian);
919
    block_size = s->page_size;
920
    dh->block_size = cpu_convert_to_target32(block_size, endian);
921
    sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
922
    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
923
    dh->sub_hdr_size = cpu_convert_to_target32(sub_hdr_size, endian);
924
    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
925
    dh->max_mapnr = cpu_convert_to_target32(MIN(s->max_mapnr, UINT_MAX),
926
                                            endian);
927
    dh->nr_cpus = cpu_convert_to_target32(s->nr_cpus, endian);
928
    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
929
    dh->bitmap_blocks = cpu_convert_to_target32(bitmap_blocks, endian);
930
    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
931

    
932
    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
933
        status |= DUMP_DH_COMPRESSED_ZLIB;
934
    }
935
#ifdef CONFIG_LZO
936
    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
937
        status |= DUMP_DH_COMPRESSED_LZO;
938
    }
939
#endif
940
#ifdef CONFIG_SNAPPY
941
    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
942
        status |= DUMP_DH_COMPRESSED_SNAPPY;
943
    }
944
#endif
945
    dh->status = cpu_convert_to_target32(status, endian);
946

    
947
    if (write_buffer(s->fd, 0, dh, size) < 0) {
948
        dump_error(s, "dump: failed to write disk dump header.\n");
949
        ret = -1;
950
        goto out;
951
    }
952

    
953
    /* write sub header */
954
    size = sizeof(KdumpSubHeader64);
955
    kh = g_malloc0(size);
956

    
957
    /* 64bit max_mapnr_64 */
958
    kh->max_mapnr_64 = cpu_convert_to_target64(s->max_mapnr, endian);
959
    kh->phys_base = cpu_convert_to_target64(PHYS_BASE, endian);
960
    kh->dump_level = cpu_convert_to_target32(DUMP_LEVEL, endian);
961

    
962
    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
963
    kh->offset_note = cpu_convert_to_target64(offset_note, endian);
964
    kh->note_size = cpu_convert_to_target64(s->note_size, endian);
965

    
966
    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
967
                     block_size, kh, size) < 0) {
968
        dump_error(s, "dump: failed to write kdump sub header.\n");
969
        ret = -1;
970
        goto out;
971
    }
972

    
973
    /* write note */
974
    s->note_buf = g_malloc0(s->note_size);
975
    s->note_buf_offset = 0;
976

    
977
    /* use s->note_buf to store notes temporarily */
978
    if (write_elf64_notes(buf_write_note, s) < 0) {
979
        ret = -1;
980
        goto out;
981
    }
982

    
983
    if (write_buffer(s->fd, offset_note, s->note_buf,
984
                     s->note_size) < 0) {
985
        dump_error(s, "dump: failed to write notes");
986
        ret = -1;
987
        goto out;
988
    }
989

    
990
    /* get offset of dump_bitmap */
991
    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
992
                             block_size;
993

    
994
    /* get offset of page */
995
    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
996
                     block_size;
997

    
998
out:
999
    g_free(dh);
1000
    g_free(kh);
1001
    g_free(s->note_buf);
1002

    
1003
    return ret;
1004
}
1005

    
1006
static int write_dump_header(DumpState *s)
1007
{
1008
    if (s->dump_info.d_machine == EM_386) {
1009
        return create_header32(s);
1010
    } else {
1011
        return create_header64(s);
1012
    }
1013
}
1014

    
1015
/*
1016
 * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
1017
 * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
1018
 * set_dump_bitmap will always leave the recently set bit un-sync. And setting
1019
 * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
1020
 * vmcore, ie. synchronizing un-sync bit into vmcore.
1021
 */
1022
static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
1023
                           uint8_t *buf, DumpState *s)
1024
{
1025
    off_t old_offset, new_offset;
1026
    off_t offset_bitmap1, offset_bitmap2;
1027
    uint32_t byte, bit;
1028

    
1029
    /* should not set the previous place */
1030
    assert(last_pfn <= pfn);
1031

    
1032
    /*
1033
     * if the bit needed to be set is not cached in buf, flush the data in buf
1034
     * to vmcore firstly.
1035
     * making new_offset be bigger than old_offset can also sync remained data
1036
     * into vmcore.
1037
     */
1038
    old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
1039
    new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
1040

    
1041
    while (old_offset < new_offset) {
1042
        /* calculate the offset and write dump_bitmap */
1043
        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
1044
        if (write_buffer(s->fd, offset_bitmap1, buf,
1045
                         BUFSIZE_BITMAP) < 0) {
1046
            return -1;
1047
        }
1048

    
1049
        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
1050
        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
1051
                         old_offset;
1052
        if (write_buffer(s->fd, offset_bitmap2, buf,
1053
                         BUFSIZE_BITMAP) < 0) {
1054
            return -1;
1055
        }
1056

    
1057
        memset(buf, 0, BUFSIZE_BITMAP);
1058
        old_offset += BUFSIZE_BITMAP;
1059
    }
1060

    
1061
    /* get the exact place of the bit in the buf, and set it */
1062
    byte = (pfn % PFN_BUFBITMAP) / CHAR_BIT;
1063
    bit = (pfn % PFN_BUFBITMAP) % CHAR_BIT;
1064
    if (value) {
1065
        buf[byte] |= 1u << bit;
1066
    } else {
1067
        buf[byte] &= ~(1u << bit);
1068
    }
1069

    
1070
    return 0;
1071
}
1072

    
1073
/*
1074
 * exam every page and return the page frame number and the address of the page.
1075
 * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
1076
 * blocks, so block->target_start and block->target_end should be interal
1077
 * multiples of the target page size.
1078
 */
1079
static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
1080
                          uint8_t **bufptr, DumpState *s)
1081
{
1082
    GuestPhysBlock *block = *blockptr;
1083
    hwaddr addr;
1084
    uint8_t *buf;
1085

    
1086
    /* block == NULL means the start of the iteration */
1087
    if (!block) {
1088
        block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1089
        *blockptr = block;
1090
        assert(block->target_start % s->page_size == 0);
1091
        assert(block->target_end % s->page_size == 0);
1092
        *pfnptr = paddr_to_pfn(block->target_start, s->page_shift);
1093
        if (bufptr) {
1094
            *bufptr = block->host_addr;
1095
        }
1096
        return true;
1097
    }
1098

    
1099
    *pfnptr = *pfnptr + 1;
1100
    addr = pfn_to_paddr(*pfnptr, s->page_shift);
1101

    
1102
    if ((addr >= block->target_start) &&
1103
        (addr + s->page_size <= block->target_end)) {
1104
        buf = block->host_addr + (addr - block->target_start);
1105
    } else {
1106
        /* the next page is in the next block */
1107
        block = QTAILQ_NEXT(block, next);
1108
        *blockptr = block;
1109
        if (!block) {
1110
            return false;
1111
        }
1112
        assert(block->target_start % s->page_size == 0);
1113
        assert(block->target_end % s->page_size == 0);
1114
        *pfnptr = paddr_to_pfn(block->target_start, s->page_shift);
1115
        buf = block->host_addr;
1116
    }
1117

    
1118
    if (bufptr) {
1119
        *bufptr = buf;
1120
    }
1121

    
1122
    return true;
1123
}
1124

    
1125
static int write_dump_bitmap(DumpState *s)
1126
{
1127
    int ret = 0;
1128
    uint64_t last_pfn, pfn;
1129
    void *dump_bitmap_buf;
1130
    size_t num_dumpable;
1131
    GuestPhysBlock *block_iter = NULL;
1132

    
1133
    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
1134
    dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
1135

    
1136
    num_dumpable = 0;
1137
    last_pfn = 0;
1138

    
1139
    /*
1140
     * exam memory page by page, and set the bit in dump_bitmap corresponded
1141
     * to the existing page.
1142
     */
1143
    while (get_next_page(&block_iter, &pfn, NULL, s)) {
1144
        ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
1145
        if (ret < 0) {
1146
            dump_error(s, "dump: failed to set dump_bitmap.\n");
1147
            ret = -1;
1148
            goto out;
1149
        }
1150

    
1151
        last_pfn = pfn;
1152
        num_dumpable++;
1153
    }
1154

    
1155
    /*
1156
     * set_dump_bitmap will always leave the recently set bit un-sync. Here we
1157
     * set last_pfn + PFN_BUFBITMAP to 0 and those set but un-sync bit will be
1158
     * synchronized into vmcore.
1159
     */
1160
    if (num_dumpable > 0) {
1161
        ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, false,
1162
                              dump_bitmap_buf, s);
1163
        if (ret < 0) {
1164
            dump_error(s, "dump: failed to sync dump_bitmap.\n");
1165
            ret = -1;
1166
            goto out;
1167
        }
1168
    }
1169

    
1170
    /* number of dumpable pages that will be dumped later */
1171
    s->num_dumpable = num_dumpable;
1172

    
1173
out:
1174
    g_free(dump_bitmap_buf);
1175

    
1176
    return ret;
1177
}
1178

    
1179
static void prepare_data_cache(DataCache *data_cache, DumpState *s,
1180
                               off_t offset)
1181
{
1182
    data_cache->fd = s->fd;
1183
    data_cache->data_size = 0;
1184
    data_cache->buf_size = BUFSIZE_DATA_CACHE;
1185
    data_cache->buf = g_malloc0(BUFSIZE_DATA_CACHE);
1186
    data_cache->offset = offset;
1187
}
1188

    
1189
static int write_cache(DataCache *dc, const void *buf, size_t size,
1190
                       bool flag_sync)
1191
{
1192
    /*
1193
     * dc->buf_size should not be less than size, otherwise dc will never be
1194
     * enough
1195
     */
1196
    assert(size <= dc->buf_size);
1197

    
1198
    /*
1199
     * if flag_sync is set, synchronize data in dc->buf into vmcore.
1200
     * otherwise check if the space is enough for caching data in buf, if not,
1201
     * write the data in dc->buf to dc->fd and reset dc->buf
1202
     */
1203
    if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
1204
        (flag_sync && dc->data_size > 0)) {
1205
        if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
1206
            return -1;
1207
        }
1208

    
1209
        dc->offset += dc->data_size;
1210
        dc->data_size = 0;
1211
    }
1212

    
1213
    if (!flag_sync) {
1214
        memcpy(dc->buf + dc->data_size, buf, size);
1215
        dc->data_size += size;
1216
    }
1217

    
1218
    return 0;
1219
}
1220

    
1221
static void free_data_cache(DataCache *data_cache)
1222
{
1223
    g_free(data_cache->buf);
1224
}
1225

    
1226
static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
1227
{
1228
    size_t len_buf_out_zlib, len_buf_out_lzo, len_buf_out_snappy;
1229
    size_t len_buf_out;
1230

    
1231
    /* init buf_out */
1232
    len_buf_out_zlib = len_buf_out_lzo = len_buf_out_snappy = 0;
1233

    
1234
    /* buf size for zlib */
1235
    len_buf_out_zlib = compressBound(page_size);
1236

    
1237
    /* buf size for lzo */
1238
#ifdef CONFIG_LZO
1239
    if (flag_compress & DUMP_DH_COMPRESSED_LZO) {
1240
        if (lzo_init() != LZO_E_OK) {
1241
            /* return 0 to indicate lzo is unavailable */
1242
            return 0;
1243
        }
1244
    }
1245

    
1246
    /*
1247
     * LZO will expand incompressible data by a little amount. please check the
1248
     * following URL to see the expansion calculation:
1249
     * http://www.oberhumer.com/opensource/lzo/lzofaq.php
1250
     */
1251
    len_buf_out_lzo = page_size + page_size / 16 + 64 + 3;
1252
#endif
1253

    
1254
#ifdef CONFIG_SNAPPY
1255
    /* buf size for snappy */
1256
    len_buf_out_snappy = snappy_max_compressed_length(page_size);
1257
#endif
1258

    
1259
    /* get the biggest that can store all kinds of compressed page */
1260
    len_buf_out = MAX(len_buf_out_zlib,
1261
                      MAX(len_buf_out_lzo, len_buf_out_snappy));
1262

    
1263
    return len_buf_out;
1264
}
1265

    
1266
/*
1267
 * check if the page is all 0
1268
 */
1269
static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
1270
{
1271
    return buffer_is_zero(buf, page_size);
1272
}
1273

    
1274
static int write_dump_pages(DumpState *s)
1275
{
1276
    int ret = 0;
1277
    DataCache page_desc, page_data;
1278
    size_t len_buf_out, size_out;
1279
#ifdef CONFIG_LZO
1280
    lzo_bytep wrkmem = NULL;
1281
#endif
1282
    uint8_t *buf_out = NULL;
1283
    off_t offset_desc, offset_data;
1284
    PageDescriptor pd, pd_zero;
1285
    uint8_t *buf;
1286
    int endian = s->dump_info.d_endian;
1287
    GuestPhysBlock *block_iter = NULL;
1288
    uint64_t pfn_iter;
1289

    
1290
    /* get offset of page_desc and page_data in dump file */
1291
    offset_desc = s->offset_page;
1292
    offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
1293

    
1294
    prepare_data_cache(&page_desc, s, offset_desc);
1295
    prepare_data_cache(&page_data, s, offset_data);
1296

    
1297
    /* prepare buffer to store compressed data */
1298
    len_buf_out = get_len_buf_out(s->page_size, s->flag_compress);
1299
    if (len_buf_out == 0) {
1300
        dump_error(s, "dump: failed to get length of output buffer.\n");
1301
        goto out;
1302
    }
1303

    
1304
#ifdef CONFIG_LZO
1305
    wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
1306
#endif
1307

    
1308
    buf_out = g_malloc(len_buf_out);
1309

    
1310
    /*
1311
     * init zero page's page_desc and page_data, because every zero page
1312
     * uses the same page_data
1313
     */
1314
    pd_zero.size = cpu_convert_to_target32(s->page_size, endian);
1315
    pd_zero.flags = cpu_convert_to_target32(0, endian);
1316
    pd_zero.offset = cpu_convert_to_target64(offset_data, endian);
1317
    pd_zero.page_flags = cpu_convert_to_target64(0, endian);
1318
    buf = g_malloc0(s->page_size);
1319
    ret = write_cache(&page_data, buf, s->page_size, false);
1320
    g_free(buf);
1321
    if (ret < 0) {
1322
        dump_error(s, "dump: failed to write page data(zero page).\n");
1323
        goto out;
1324
    }
1325

    
1326
    offset_data += s->page_size;
1327

    
1328
    /*
1329
     * dump memory to vmcore page by page. zero page will all be resided in the
1330
     * first page of page section
1331
     */
1332
    while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
1333
        /* check zero page */
1334
        if (is_zero_page(buf, s->page_size)) {
1335
            ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
1336
                              false);
1337
            if (ret < 0) {
1338
                dump_error(s, "dump: failed to write page desc.\n");
1339
                goto out;
1340
            }
1341
        } else {
1342
            /*
1343
             * not zero page, then:
1344
             * 1. compress the page
1345
             * 2. write the compressed page into the cache of page_data
1346
             * 3. get page desc of the compressed page and write it into the
1347
             *    cache of page_desc
1348
             *
1349
             * only one compression format will be used here, for
1350
             * s->flag_compress is set. But when compression fails to work,
1351
             * we fall back to save in plaintext.
1352
             */
1353
             size_out = len_buf_out;
1354
             if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
1355
                    (compress2(buf_out, (uLongf *)&size_out, buf, s->page_size,
1356
                    Z_BEST_SPEED) == Z_OK) && (size_out < s->page_size)) {
1357
                pd.flags = cpu_convert_to_target32(DUMP_DH_COMPRESSED_ZLIB,
1358
                                                   endian);
1359
                pd.size  = cpu_convert_to_target32(size_out, endian);
1360

    
1361
                ret = write_cache(&page_data, buf_out, size_out, false);
1362
                if (ret < 0) {
1363
                    dump_error(s, "dump: failed to write page data.\n");
1364
                    goto out;
1365
                }
1366
#ifdef CONFIG_LZO
1367
            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
1368
                    (lzo1x_1_compress(buf, s->page_size, buf_out,
1369
                    (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
1370
                    (size_out < s->page_size)) {
1371
                pd.flags = cpu_convert_to_target32(DUMP_DH_COMPRESSED_LZO,
1372
                                                   endian);
1373
                pd.size  = cpu_convert_to_target32(size_out, endian);
1374

    
1375
                ret = write_cache(&page_data, buf_out, size_out, false);
1376
                if (ret < 0) {
1377
                    dump_error(s, "dump: failed to write page data.\n");
1378
                    goto out;
1379
                }
1380
#endif
1381
#ifdef CONFIG_SNAPPY
1382
            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
1383
                    (snappy_compress((char *)buf, s->page_size,
1384
                    (char *)buf_out, &size_out) == SNAPPY_OK) &&
1385
                    (size_out < s->page_size)) {
1386
                pd.flags = cpu_convert_to_target32(
1387
                                        DUMP_DH_COMPRESSED_SNAPPY, endian);
1388
                pd.size  = cpu_convert_to_target32(size_out, endian);
1389

    
1390
                ret = write_cache(&page_data, buf_out, size_out, false);
1391
                if (ret < 0) {
1392
                    dump_error(s, "dump: failed to write page data.\n");
1393
                    goto out;
1394
                }
1395
#endif
1396
            } else {
1397
                /*
1398
                 * fall back to save in plaintext, size_out should be
1399
                 * assigned to s->page_size
1400
                 */
1401
                pd.flags = cpu_convert_to_target32(0, endian);
1402
                size_out = s->page_size;
1403
                pd.size = cpu_convert_to_target32(size_out, endian);
1404

    
1405
                ret = write_cache(&page_data, buf, s->page_size, false);
1406
                if (ret < 0) {
1407
                    dump_error(s, "dump: failed to write page data.\n");
1408
                    goto out;
1409
                }
1410
            }
1411

    
1412
            /* get and write page desc here */
1413
            pd.page_flags = cpu_convert_to_target64(0, endian);
1414
            pd.offset = cpu_convert_to_target64(offset_data, endian);
1415
            offset_data += size_out;
1416

    
1417
            ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
1418
            if (ret < 0) {
1419
                dump_error(s, "dump: failed to write page desc.\n");
1420
                goto out;
1421
            }
1422
        }
1423
    }
1424

    
1425
    ret = write_cache(&page_desc, NULL, 0, true);
1426
    if (ret < 0) {
1427
        dump_error(s, "dump: failed to sync cache for page_desc.\n");
1428
        goto out;
1429
    }
1430
    ret = write_cache(&page_data, NULL, 0, true);
1431
    if (ret < 0) {
1432
        dump_error(s, "dump: failed to sync cache for page_data.\n");
1433
        goto out;
1434
    }
1435

    
1436
out:
1437
    free_data_cache(&page_desc);
1438
    free_data_cache(&page_data);
1439

    
1440
#ifdef CONFIG_LZO
1441
    g_free(wrkmem);
1442
#endif
1443

    
1444
    g_free(buf_out);
1445

    
1446
    return ret;
1447
}
1448

    
1449
static int create_kdump_vmcore(DumpState *s)
1450
{
1451
    int ret;
1452

    
1453
    /*
1454
     * the kdump-compressed format is:
1455
     *                                               File offset
1456
     *  +------------------------------------------+ 0x0
1457
     *  |    main header (struct disk_dump_header) |
1458
     *  |------------------------------------------+ block 1
1459
     *  |    sub header (struct kdump_sub_header)  |
1460
     *  |------------------------------------------+ block 2
1461
     *  |            1st-dump_bitmap               |
1462
     *  |------------------------------------------+ block 2 + X blocks
1463
     *  |            2nd-dump_bitmap               | (aligned by block)
1464
     *  |------------------------------------------+ block 2 + 2 * X blocks
1465
     *  |  page desc for pfn 0 (struct page_desc)  | (aligned by block)
1466
     *  |  page desc for pfn 1 (struct page_desc)  |
1467
     *  |                    :                     |
1468
     *  |------------------------------------------| (not aligned by block)
1469
     *  |         page data (pfn 0)                |
1470
     *  |         page data (pfn 1)                |
1471
     *  |                    :                     |
1472
     *  +------------------------------------------+
1473
     */
1474

    
1475
    ret = write_start_flat_header(s->fd);
1476
    if (ret < 0) {
1477
        dump_error(s, "dump: failed to write start flat header.\n");
1478
        return -1;
1479
    }
1480

    
1481
    ret = write_dump_header(s);
1482
    if (ret < 0) {
1483
        return -1;
1484
    }
1485

    
1486
    ret = write_dump_bitmap(s);
1487
    if (ret < 0) {
1488
        return -1;
1489
    }
1490

    
1491
    ret = write_dump_pages(s);
1492
    if (ret < 0) {
1493
        return -1;
1494
    }
1495

    
1496
    ret = write_end_flat_header(s->fd);
1497
    if (ret < 0) {
1498
        dump_error(s, "dump: failed to write end flat header.\n");
1499
        return -1;
1500
    }
1501

    
1502
    dump_completed(s);
1503

    
1504
    return 0;
1505
}
1506

    
1507
static ram_addr_t get_start_block(DumpState *s)
1508
{
1509
    GuestPhysBlock *block;
1510

    
1511
    if (!s->has_filter) {
1512
        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1513
        return 0;
1514
    }
1515

    
1516
    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
1517
        if (block->target_start >= s->begin + s->length ||
1518
            block->target_end <= s->begin) {
1519
            /* This block is out of the range */
1520
            continue;
1521
        }
1522

    
1523
        s->next_block = block;
1524
        if (s->begin > block->target_start) {
1525
            s->start = s->begin - block->target_start;
1526
        } else {
1527
            s->start = 0;
1528
        }
1529
        return s->start;
1530
    }
1531

    
1532
    return -1;
1533
}
1534

    
1535
static void get_max_mapnr(DumpState *s)
1536
{
1537
    GuestPhysBlock *last_block;
1538

    
1539
    last_block = QTAILQ_LAST(&s->guest_phys_blocks.head, GuestPhysBlockHead);
1540
    s->max_mapnr = paddr_to_pfn(last_block->target_end, s->page_shift);
1541
}
1542

    
1543
static int dump_init(DumpState *s, int fd, bool has_format,
1544
                     DumpGuestMemoryFormat format, bool paging, bool has_filter,
1545
                     int64_t begin, int64_t length, Error **errp)
1546
{
1547
    CPUState *cpu;
1548
    int nr_cpus;
1549
    Error *err = NULL;
1550
    int ret;
1551

    
1552
    /* kdump-compressed is conflict with paging and filter */
1553
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1554
        assert(!paging && !has_filter);
1555
    }
1556

    
1557
    if (runstate_is_running()) {
1558
        vm_stop(RUN_STATE_SAVE_VM);
1559
        s->resume = true;
1560
    } else {
1561
        s->resume = false;
1562
    }
1563

    
1564
    /* If we use KVM, we should synchronize the registers before we get dump
1565
     * info or physmap info.
1566
     */
1567
    cpu_synchronize_all_states();
1568
    nr_cpus = 0;
1569
    CPU_FOREACH(cpu) {
1570
        nr_cpus++;
1571
    }
1572

    
1573
    s->errp = errp;
1574
    s->fd = fd;
1575
    s->has_filter = has_filter;
1576
    s->begin = begin;
1577
    s->length = length;
1578

    
1579
    guest_phys_blocks_init(&s->guest_phys_blocks);
1580
    guest_phys_blocks_append(&s->guest_phys_blocks);
1581

    
1582
    s->start = get_start_block(s);
1583
    if (s->start == -1) {
1584
        error_set(errp, QERR_INVALID_PARAMETER, "begin");
1585
        goto cleanup;
1586
    }
1587

    
1588
    /* get dump info: endian, class and architecture.
1589
     * If the target architecture is not supported, cpu_get_dump_info() will
1590
     * return -1.
1591
     */
1592
    ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
1593
    if (ret < 0) {
1594
        error_set(errp, QERR_UNSUPPORTED);
1595
        goto cleanup;
1596
    }
1597

    
1598
    s->note_size = cpu_get_note_size(s->dump_info.d_class,
1599
                                     s->dump_info.d_machine, nr_cpus);
1600
    if (s->note_size < 0) {
1601
        error_set(errp, QERR_UNSUPPORTED);
1602
        goto cleanup;
1603
    }
1604

    
1605
    /* get memory mapping */
1606
    memory_mapping_list_init(&s->list);
1607
    if (paging) {
1608
        qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
1609
        if (err != NULL) {
1610
            error_propagate(errp, err);
1611
            goto cleanup;
1612
        }
1613
    } else {
1614
        qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
1615
    }
1616

    
1617
    s->nr_cpus = nr_cpus;
1618
    s->page_size = TARGET_PAGE_SIZE;
1619
    s->page_shift = ffs(s->page_size) - 1;
1620

    
1621
    get_max_mapnr(s);
1622

    
1623
    uint64_t tmp;
1624
    tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT), s->page_size);
1625
    s->len_dump_bitmap = tmp * s->page_size;
1626

    
1627
    /* init for kdump-compressed format */
1628
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1629
        switch (format) {
1630
        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
1631
            s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
1632
            break;
1633

    
1634
        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
1635
            s->flag_compress = DUMP_DH_COMPRESSED_LZO;
1636
            break;
1637

    
1638
        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
1639
            s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
1640
            break;
1641

    
1642
        default:
1643
            s->flag_compress = 0;
1644
        }
1645

    
1646
        return 0;
1647
    }
1648

    
1649
    if (s->has_filter) {
1650
        memory_mapping_filter(&s->list, s->begin, s->length);
1651
    }
1652

    
1653
    /*
1654
     * calculate phdr_num
1655
     *
1656
     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
1657
     */
1658
    s->phdr_num = 1; /* PT_NOTE */
1659
    if (s->list.num < UINT16_MAX - 2) {
1660
        s->phdr_num += s->list.num;
1661
        s->have_section = false;
1662
    } else {
1663
        s->have_section = true;
1664
        s->phdr_num = PN_XNUM;
1665
        s->sh_info = 1; /* PT_NOTE */
1666

    
1667
        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
1668
        if (s->list.num <= UINT32_MAX - 1) {
1669
            s->sh_info += s->list.num;
1670
        } else {
1671
            s->sh_info = UINT32_MAX;
1672
        }
1673
    }
1674

    
1675
    if (s->dump_info.d_class == ELFCLASS64) {
1676
        if (s->have_section) {
1677
            s->memory_offset = sizeof(Elf64_Ehdr) +
1678
                               sizeof(Elf64_Phdr) * s->sh_info +
1679
                               sizeof(Elf64_Shdr) + s->note_size;
1680
        } else {
1681
            s->memory_offset = sizeof(Elf64_Ehdr) +
1682
                               sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
1683
        }
1684
    } else {
1685
        if (s->have_section) {
1686
            s->memory_offset = sizeof(Elf32_Ehdr) +
1687
                               sizeof(Elf32_Phdr) * s->sh_info +
1688
                               sizeof(Elf32_Shdr) + s->note_size;
1689
        } else {
1690
            s->memory_offset = sizeof(Elf32_Ehdr) +
1691
                               sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
1692
        }
1693
    }
1694

    
1695
    return 0;
1696

    
1697
cleanup:
1698
    guest_phys_blocks_free(&s->guest_phys_blocks);
1699

    
1700
    if (s->resume) {
1701
        vm_start();
1702
    }
1703

    
1704
    return -1;
1705
}
1706

    
1707
void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
1708
                           int64_t begin, bool has_length,
1709
                           int64_t length, bool has_format,
1710
                           DumpGuestMemoryFormat format, Error **errp)
1711
{
1712
    const char *p;
1713
    int fd = -1;
1714
    DumpState *s;
1715
    int ret;
1716

    
1717
    /*
1718
     * kdump-compressed format need the whole memory dumped, so paging or
1719
     * filter is not supported here.
1720
     */
1721
    if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
1722
        (paging || has_begin || has_length)) {
1723
        error_setg(errp, "kdump-compressed format doesn't support paging or "
1724
                         "filter");
1725
        return;
1726
    }
1727
    if (has_begin && !has_length) {
1728
        error_set(errp, QERR_MISSING_PARAMETER, "length");
1729
        return;
1730
    }
1731
    if (!has_begin && has_length) {
1732
        error_set(errp, QERR_MISSING_PARAMETER, "begin");
1733
        return;
1734
    }
1735

    
1736
    /* check whether lzo/snappy is supported */
1737
#ifndef CONFIG_LZO
1738
    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
1739
        error_setg(errp, "kdump-lzo is not available now");
1740
        return;
1741
    }
1742
#endif
1743

    
1744
#ifndef CONFIG_SNAPPY
1745
    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
1746
        error_setg(errp, "kdump-snappy is not available now");
1747
        return;
1748
    }
1749
#endif
1750

    
1751
#if !defined(WIN32)
1752
    if (strstart(file, "fd:", &p)) {
1753
        fd = monitor_get_fd(cur_mon, p, errp);
1754
        if (fd == -1) {
1755
            return;
1756
        }
1757
    }
1758
#endif
1759

    
1760
    if  (strstart(file, "file:", &p)) {
1761
        fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
1762
        if (fd < 0) {
1763
            error_setg_file_open(errp, errno, p);
1764
            return;
1765
        }
1766
    }
1767

    
1768
    if (fd == -1) {
1769
        error_set(errp, QERR_INVALID_PARAMETER, "protocol");
1770
        return;
1771
    }
1772

    
1773
    s = g_malloc0(sizeof(DumpState));
1774

    
1775
    ret = dump_init(s, fd, has_format, format, paging, has_begin,
1776
                    begin, length, errp);
1777
    if (ret < 0) {
1778
        g_free(s);
1779
        return;
1780
    }
1781

    
1782
    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1783
        if (create_kdump_vmcore(s) < 0 && !error_is_set(s->errp)) {
1784
            error_set(errp, QERR_IO_ERROR);
1785
        }
1786
    } else {
1787
        if (create_vmcore(s) < 0 && !error_is_set(s->errp)) {
1788
            error_set(errp, QERR_IO_ERROR);
1789
        }
1790
    }
1791

    
1792
    g_free(s);
1793
}
1794

    
1795
DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
1796
{
1797
    DumpGuestMemoryFormatList *item;
1798
    DumpGuestMemoryCapability *cap =
1799
                                  g_malloc0(sizeof(DumpGuestMemoryCapability));
1800

    
1801
    /* elf is always available */
1802
    item = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1803
    cap->formats = item;
1804
    item->value = DUMP_GUEST_MEMORY_FORMAT_ELF;
1805

    
1806
    /* kdump-zlib is always available */
1807
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1808
    item = item->next;
1809
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;
1810

    
1811
    /* add new item if kdump-lzo is available */
1812
#ifdef CONFIG_LZO
1813
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1814
    item = item->next;
1815
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
1816
#endif
1817

    
1818
    /* add new item if kdump-snappy is available */
1819
#ifdef CONFIG_SNAPPY
1820
    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1821
    item = item->next;
1822
    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
1823
#endif
1824

    
1825
    return cap;
1826
}