Statistics
| Branch: | Revision:

root / xen-all.c @ feature-archipelago

History | View | Annotate | Download (35.7 kB)

1
/*
2
 * Copyright (C) 2010       Citrix Ltd.
3
 *
4
 * This work is licensed under the terms of the GNU GPL, version 2.  See
5
 * the COPYING file in the top-level directory.
6
 *
7
 * Contributions after 2012-01-13 are licensed under the terms of the
8
 * GNU GPL, version 2 or (at your option) any later version.
9
 */
10

    
11
#include <sys/mman.h>
12

    
13
#include "hw/pci/pci.h"
14
#include "hw/i386/pc.h"
15
#include "hw/xen/xen_common.h"
16
#include "hw/xen/xen_backend.h"
17
#include "qmp-commands.h"
18

    
19
#include "sysemu/char.h"
20
#include "qemu/range.h"
21
#include "sysemu/xen-mapcache.h"
22
#include "trace.h"
23
#include "exec/address-spaces.h"
24

    
25
#include <xen/hvm/ioreq.h>
26
#include <xen/hvm/params.h>
27
#include <xen/hvm/e820.h>
28

    
29
//#define DEBUG_XEN
30

    
31
#ifdef DEBUG_XEN
32
#define DPRINTF(fmt, ...) \
33
    do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
34
#else
35
#define DPRINTF(fmt, ...) \
36
    do { } while (0)
37
#endif
38

    
39
static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi;
40
static MemoryRegion *framebuffer;
41
static bool xen_in_migration;
42

    
43
/* Compatibility with older version */
44
#if __XEN_LATEST_INTERFACE_VERSION__ < 0x0003020a
45
static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
46
{
47
    return shared_page->vcpu_iodata[i].vp_eport;
48
}
49
static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
50
{
51
    return &shared_page->vcpu_iodata[vcpu].vp_ioreq;
52
}
53
#  define FMT_ioreq_size PRIx64
54
#else
55
static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
56
{
57
    return shared_page->vcpu_ioreq[i].vp_eport;
58
}
59
static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
60
{
61
    return &shared_page->vcpu_ioreq[vcpu];
62
}
63
#  define FMT_ioreq_size "u"
64
#endif
65
#ifndef HVM_PARAM_BUFIOREQ_EVTCHN
66
#define HVM_PARAM_BUFIOREQ_EVTCHN 26
67
#endif
68

    
69
#define BUFFER_IO_MAX_DELAY  100
70

    
71
typedef struct XenPhysmap {
72
    hwaddr start_addr;
73
    ram_addr_t size;
74
    char *name;
75
    hwaddr phys_offset;
76

    
77
    QLIST_ENTRY(XenPhysmap) list;
78
} XenPhysmap;
79

    
80
typedef struct XenIOState {
81
    shared_iopage_t *shared_page;
82
    buffered_iopage_t *buffered_io_page;
83
    QEMUTimer *buffered_io_timer;
84
    /* the evtchn port for polling the notification, */
85
    evtchn_port_t *ioreq_local_port;
86
    /* evtchn local port for buffered io */
87
    evtchn_port_t bufioreq_local_port;
88
    /* the evtchn fd for polling */
89
    XenEvtchn xce_handle;
90
    /* which vcpu we are serving */
91
    int send_vcpu;
92

    
93
    struct xs_handle *xenstore;
94
    MemoryListener memory_listener;
95
    QLIST_HEAD(, XenPhysmap) physmap;
96
    hwaddr free_phys_offset;
97
    const XenPhysmap *log_for_dirtybit;
98

    
99
    Notifier exit;
100
    Notifier suspend;
101
    Notifier wakeup;
102
} XenIOState;
103

    
104
/* Xen specific function for piix pci */
105

    
106
int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
107
{
108
    return irq_num + ((pci_dev->devfn >> 3) << 2);
109
}
110

    
111
void xen_piix3_set_irq(void *opaque, int irq_num, int level)
112
{
113
    xc_hvm_set_pci_intx_level(xen_xc, xen_domid, 0, 0, irq_num >> 2,
114
                              irq_num & 3, level);
115
}
116

    
117
void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
118
{
119
    int i;
120

    
121
    /* Scan for updates to PCI link routes (0x60-0x63). */
122
    for (i = 0; i < len; i++) {
123
        uint8_t v = (val >> (8 * i)) & 0xff;
124
        if (v & 0x80) {
125
            v = 0;
126
        }
127
        v &= 0xf;
128
        if (((address + i) >= 0x60) && ((address + i) <= 0x63)) {
129
            xc_hvm_set_pci_link_route(xen_xc, xen_domid, address + i - 0x60, v);
130
        }
131
    }
132
}
133

    
134
void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
135
{
136
    xen_xc_hvm_inject_msi(xen_xc, xen_domid, addr, data);
137
}
138

    
139
static void xen_suspend_notifier(Notifier *notifier, void *data)
140
{
141
    xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
142
}
143

    
144
/* Xen Interrupt Controller */
145

    
146
static void xen_set_irq(void *opaque, int irq, int level)
147
{
148
    xc_hvm_set_isa_irq_level(xen_xc, xen_domid, irq, level);
149
}
150

    
151
qemu_irq *xen_interrupt_controller_init(void)
152
{
153
    return qemu_allocate_irqs(xen_set_irq, NULL, 16);
154
}
155

    
156
/* Memory Ops */
157

    
158
static void xen_ram_init(ram_addr_t ram_size, MemoryRegion **ram_memory_p)
159
{
160
    MemoryRegion *sysmem = get_system_memory();
161
    ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
162
    ram_addr_t block_len;
163

    
164
    block_len = ram_size;
165
    if (ram_size >= HVM_BELOW_4G_RAM_END) {
166
        /* Xen does not allocate the memory continuously, and keep a hole at
167
         * HVM_BELOW_4G_MMIO_START of HVM_BELOW_4G_MMIO_LENGTH
168
         */
169
        block_len += HVM_BELOW_4G_MMIO_LENGTH;
170
    }
171
    memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len);
172
    *ram_memory_p = &ram_memory;
173
    vmstate_register_ram_global(&ram_memory);
174

    
175
    if (ram_size >= HVM_BELOW_4G_RAM_END) {
176
        above_4g_mem_size = ram_size - HVM_BELOW_4G_RAM_END;
177
        below_4g_mem_size = HVM_BELOW_4G_RAM_END;
178
    } else {
179
        below_4g_mem_size = ram_size;
180
    }
181

    
182
    memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
183
                             &ram_memory, 0, 0xa0000);
184
    memory_region_add_subregion(sysmem, 0, &ram_640k);
185
    /* Skip of the VGA IO memory space, it will be registered later by the VGA
186
     * emulated device.
187
     *
188
     * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load
189
     * the Options ROM, so it is registered here as RAM.
190
     */
191
    memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo",
192
                             &ram_memory, 0xc0000, below_4g_mem_size - 0xc0000);
193
    memory_region_add_subregion(sysmem, 0xc0000, &ram_lo);
194
    if (above_4g_mem_size > 0) {
195
        memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi",
196
                                 &ram_memory, 0x100000000ULL,
197
                                 above_4g_mem_size);
198
        memory_region_add_subregion(sysmem, 0x100000000ULL, &ram_hi);
199
    }
200
}
201

    
202
void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr)
203
{
204
    unsigned long nr_pfn;
205
    xen_pfn_t *pfn_list;
206
    int i;
207

    
208
    if (runstate_check(RUN_STATE_INMIGRATE)) {
209
        /* RAM already populated in Xen */
210
        fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT
211
                " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n",
212
                __func__, size, ram_addr); 
213
        return;
214
    }
215

    
216
    if (mr == &ram_memory) {
217
        return;
218
    }
219

    
220
    trace_xen_ram_alloc(ram_addr, size);
221

    
222
    nr_pfn = size >> TARGET_PAGE_BITS;
223
    pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn);
224

    
225
    for (i = 0; i < nr_pfn; i++) {
226
        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
227
    }
228

    
229
    if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
230
        hw_error("xen: failed to populate ram at " RAM_ADDR_FMT, ram_addr);
231
    }
232

    
233
    g_free(pfn_list);
234
}
235

    
236
static XenPhysmap *get_physmapping(XenIOState *state,
237
                                   hwaddr start_addr, ram_addr_t size)
238
{
239
    XenPhysmap *physmap = NULL;
240

    
241
    start_addr &= TARGET_PAGE_MASK;
242

    
243
    QLIST_FOREACH(physmap, &state->physmap, list) {
244
        if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) {
245
            return physmap;
246
        }
247
    }
248
    return NULL;
249
}
250

    
251
static hwaddr xen_phys_offset_to_gaddr(hwaddr start_addr,
252
                                                   ram_addr_t size, void *opaque)
253
{
254
    hwaddr addr = start_addr & TARGET_PAGE_MASK;
255
    XenIOState *xen_io_state = opaque;
256
    XenPhysmap *physmap = NULL;
257

    
258
    QLIST_FOREACH(physmap, &xen_io_state->physmap, list) {
259
        if (range_covers_byte(physmap->phys_offset, physmap->size, addr)) {
260
            return physmap->start_addr;
261
        }
262
    }
263

    
264
    return start_addr;
265
}
266

    
267
#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 340
268
static int xen_add_to_physmap(XenIOState *state,
269
                              hwaddr start_addr,
270
                              ram_addr_t size,
271
                              MemoryRegion *mr,
272
                              hwaddr offset_within_region)
273
{
274
    unsigned long i = 0;
275
    int rc = 0;
276
    XenPhysmap *physmap = NULL;
277
    hwaddr pfn, start_gpfn;
278
    hwaddr phys_offset = memory_region_get_ram_addr(mr);
279
    char path[80], value[17];
280

    
281
    if (get_physmapping(state, start_addr, size)) {
282
        return 0;
283
    }
284
    if (size <= 0) {
285
        return -1;
286
    }
287

    
288
    /* Xen can only handle a single dirty log region for now and we want
289
     * the linear framebuffer to be that region.
290
     * Avoid tracking any regions that is not videoram and avoid tracking
291
     * the legacy vga region. */
292
    if (mr == framebuffer && start_addr > 0xbffff) {
293
        goto go_physmap;
294
    }
295
    return -1;
296

    
297
go_physmap:
298
    DPRINTF("mapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
299
            start_addr, start_addr + size);
300

    
301
    pfn = phys_offset >> TARGET_PAGE_BITS;
302
    start_gpfn = start_addr >> TARGET_PAGE_BITS;
303
    for (i = 0; i < size >> TARGET_PAGE_BITS; i++) {
304
        unsigned long idx = pfn + i;
305
        xen_pfn_t gpfn = start_gpfn + i;
306

    
307
        rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn);
308
        if (rc) {
309
            DPRINTF("add_to_physmap MFN %"PRI_xen_pfn" to PFN %"
310
                    PRI_xen_pfn" failed: %d\n", idx, gpfn, rc);
311
            return -rc;
312
        }
313
    }
314

    
315
    physmap = g_malloc(sizeof (XenPhysmap));
316

    
317
    physmap->start_addr = start_addr;
318
    physmap->size = size;
319
    physmap->name = (char *)mr->name;
320
    physmap->phys_offset = phys_offset;
321

    
322
    QLIST_INSERT_HEAD(&state->physmap, physmap, list);
323

    
324
    xc_domain_pin_memory_cacheattr(xen_xc, xen_domid,
325
                                   start_addr >> TARGET_PAGE_BITS,
326
                                   (start_addr + size) >> TARGET_PAGE_BITS,
327
                                   XEN_DOMCTL_MEM_CACHEATTR_WB);
328

    
329
    snprintf(path, sizeof(path),
330
            "/local/domain/0/device-model/%d/physmap/%"PRIx64"/start_addr",
331
            xen_domid, (uint64_t)phys_offset);
332
    snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)start_addr);
333
    if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
334
        return -1;
335
    }
336
    snprintf(path, sizeof(path),
337
            "/local/domain/0/device-model/%d/physmap/%"PRIx64"/size",
338
            xen_domid, (uint64_t)phys_offset);
339
    snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)size);
340
    if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
341
        return -1;
342
    }
343
    if (mr->name) {
344
        snprintf(path, sizeof(path),
345
                "/local/domain/0/device-model/%d/physmap/%"PRIx64"/name",
346
                xen_domid, (uint64_t)phys_offset);
347
        if (!xs_write(state->xenstore, 0, path, mr->name, strlen(mr->name))) {
348
            return -1;
349
        }
350
    }
351

    
352
    return 0;
353
}
354

    
355
static int xen_remove_from_physmap(XenIOState *state,
356
                                   hwaddr start_addr,
357
                                   ram_addr_t size)
358
{
359
    unsigned long i = 0;
360
    int rc = 0;
361
    XenPhysmap *physmap = NULL;
362
    hwaddr phys_offset = 0;
363

    
364
    physmap = get_physmapping(state, start_addr, size);
365
    if (physmap == NULL) {
366
        return -1;
367
    }
368

    
369
    phys_offset = physmap->phys_offset;
370
    size = physmap->size;
371

    
372
    DPRINTF("unmapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx", at "
373
            "%"HWADDR_PRIx"\n", start_addr, start_addr + size, phys_offset);
374

    
375
    size >>= TARGET_PAGE_BITS;
376
    start_addr >>= TARGET_PAGE_BITS;
377
    phys_offset >>= TARGET_PAGE_BITS;
378
    for (i = 0; i < size; i++) {
379
        unsigned long idx = start_addr + i;
380
        xen_pfn_t gpfn = phys_offset + i;
381

    
382
        rc = xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn);
383
        if (rc) {
384
            fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"
385
                    PRI_xen_pfn" failed: %d\n", idx, gpfn, rc);
386
            return -rc;
387
        }
388
    }
389

    
390
    QLIST_REMOVE(physmap, list);
391
    if (state->log_for_dirtybit == physmap) {
392
        state->log_for_dirtybit = NULL;
393
    }
394
    g_free(physmap);
395

    
396
    return 0;
397
}
398

    
399
#else
400
static int xen_add_to_physmap(XenIOState *state,
401
                              hwaddr start_addr,
402
                              ram_addr_t size,
403
                              MemoryRegion *mr,
404
                              hwaddr offset_within_region)
405
{
406
    return -ENOSYS;
407
}
408

    
409
static int xen_remove_from_physmap(XenIOState *state,
410
                                   hwaddr start_addr,
411
                                   ram_addr_t size)
412
{
413
    return -ENOSYS;
414
}
415
#endif
416

    
417
static void xen_set_memory(struct MemoryListener *listener,
418
                           MemoryRegionSection *section,
419
                           bool add)
420
{
421
    XenIOState *state = container_of(listener, XenIOState, memory_listener);
422
    hwaddr start_addr = section->offset_within_address_space;
423
    ram_addr_t size = int128_get64(section->size);
424
    bool log_dirty = memory_region_is_logging(section->mr);
425
    hvmmem_type_t mem_type;
426

    
427
    if (!memory_region_is_ram(section->mr)) {
428
        return;
429
    }
430

    
431
    if (!(section->mr != &ram_memory
432
          && ( (log_dirty && add) || (!log_dirty && !add)))) {
433
        return;
434
    }
435

    
436
    trace_xen_client_set_memory(start_addr, size, log_dirty);
437

    
438
    start_addr &= TARGET_PAGE_MASK;
439
    size = TARGET_PAGE_ALIGN(size);
440

    
441
    if (add) {
442
        if (!memory_region_is_rom(section->mr)) {
443
            xen_add_to_physmap(state, start_addr, size,
444
                               section->mr, section->offset_within_region);
445
        } else {
446
            mem_type = HVMMEM_ram_ro;
447
            if (xc_hvm_set_mem_type(xen_xc, xen_domid, mem_type,
448
                                    start_addr >> TARGET_PAGE_BITS,
449
                                    size >> TARGET_PAGE_BITS)) {
450
                DPRINTF("xc_hvm_set_mem_type error, addr: "TARGET_FMT_plx"\n",
451
                        start_addr);
452
            }
453
        }
454
    } else {
455
        if (xen_remove_from_physmap(state, start_addr, size) < 0) {
456
            DPRINTF("physmapping does not exist at "TARGET_FMT_plx"\n", start_addr);
457
        }
458
    }
459
}
460

    
461
static void xen_region_add(MemoryListener *listener,
462
                           MemoryRegionSection *section)
463
{
464
    memory_region_ref(section->mr);
465
    xen_set_memory(listener, section, true);
466
}
467

    
468
static void xen_region_del(MemoryListener *listener,
469
                           MemoryRegionSection *section)
470
{
471
    xen_set_memory(listener, section, false);
472
    memory_region_unref(section->mr);
473
}
474

    
475
static void xen_sync_dirty_bitmap(XenIOState *state,
476
                                  hwaddr start_addr,
477
                                  ram_addr_t size)
478
{
479
    hwaddr npages = size >> TARGET_PAGE_BITS;
480
    const int width = sizeof(unsigned long) * 8;
481
    unsigned long bitmap[(npages + width - 1) / width];
482
    int rc, i, j;
483
    const XenPhysmap *physmap = NULL;
484

    
485
    physmap = get_physmapping(state, start_addr, size);
486
    if (physmap == NULL) {
487
        /* not handled */
488
        return;
489
    }
490

    
491
    if (state->log_for_dirtybit == NULL) {
492
        state->log_for_dirtybit = physmap;
493
    } else if (state->log_for_dirtybit != physmap) {
494
        /* Only one range for dirty bitmap can be tracked. */
495
        return;
496
    }
497

    
498
    rc = xc_hvm_track_dirty_vram(xen_xc, xen_domid,
499
                                 start_addr >> TARGET_PAGE_BITS, npages,
500
                                 bitmap);
501
    if (rc < 0) {
502
        if (rc != -ENODATA) {
503
            memory_region_set_dirty(framebuffer, 0, size);
504
            DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx
505
                    ", 0x" TARGET_FMT_plx "): %s\n",
506
                    start_addr, start_addr + size, strerror(-rc));
507
        }
508
        return;
509
    }
510

    
511
    for (i = 0; i < ARRAY_SIZE(bitmap); i++) {
512
        unsigned long map = bitmap[i];
513
        while (map != 0) {
514
            j = ffsl(map) - 1;
515
            map &= ~(1ul << j);
516
            memory_region_set_dirty(framebuffer,
517
                                    (i * width + j) * TARGET_PAGE_SIZE,
518
                                    TARGET_PAGE_SIZE);
519
        };
520
    }
521
}
522

    
523
static void xen_log_start(MemoryListener *listener,
524
                          MemoryRegionSection *section)
525
{
526
    XenIOState *state = container_of(listener, XenIOState, memory_listener);
527

    
528
    xen_sync_dirty_bitmap(state, section->offset_within_address_space,
529
                          int128_get64(section->size));
530
}
531

    
532
static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section)
533
{
534
    XenIOState *state = container_of(listener, XenIOState, memory_listener);
535

    
536
    state->log_for_dirtybit = NULL;
537
    /* Disable dirty bit tracking */
538
    xc_hvm_track_dirty_vram(xen_xc, xen_domid, 0, 0, NULL);
539
}
540

    
541
static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
542
{
543
    XenIOState *state = container_of(listener, XenIOState, memory_listener);
544

    
545
    xen_sync_dirty_bitmap(state, section->offset_within_address_space,
546
                          int128_get64(section->size));
547
}
548

    
549
static void xen_log_global_start(MemoryListener *listener)
550
{
551
    if (xen_enabled()) {
552
        xen_in_migration = true;
553
    }
554
}
555

    
556
static void xen_log_global_stop(MemoryListener *listener)
557
{
558
    xen_in_migration = false;
559
}
560

    
561
static MemoryListener xen_memory_listener = {
562
    .region_add = xen_region_add,
563
    .region_del = xen_region_del,
564
    .log_start = xen_log_start,
565
    .log_stop = xen_log_stop,
566
    .log_sync = xen_log_sync,
567
    .log_global_start = xen_log_global_start,
568
    .log_global_stop = xen_log_global_stop,
569
    .priority = 10,
570
};
571

    
572
void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
573
{
574
    if (enable) {
575
        memory_global_dirty_log_start();
576
    } else {
577
        memory_global_dirty_log_stop();
578
    }
579
}
580

    
581
/* get the ioreq packets from share mem */
582
static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
583
{
584
    ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
585

    
586
    if (req->state != STATE_IOREQ_READY) {
587
        DPRINTF("I/O request not ready: "
588
                "%x, ptr: %x, port: %"PRIx64", "
589
                "data: %"PRIx64", count: %" FMT_ioreq_size ", size: %" FMT_ioreq_size "\n",
590
                req->state, req->data_is_ptr, req->addr,
591
                req->data, req->count, req->size);
592
        return NULL;
593
    }
594

    
595
    xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
596

    
597
    req->state = STATE_IOREQ_INPROCESS;
598
    return req;
599
}
600

    
601
/* use poll to get the port notification */
602
/* ioreq_vec--out,the */
603
/* retval--the number of ioreq packet */
604
static ioreq_t *cpu_get_ioreq(XenIOState *state)
605
{
606
    int i;
607
    evtchn_port_t port;
608

    
609
    port = xc_evtchn_pending(state->xce_handle);
610
    if (port == state->bufioreq_local_port) {
611
        timer_mod(state->buffered_io_timer,
612
                BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
613
        return NULL;
614
    }
615

    
616
    if (port != -1) {
617
        for (i = 0; i < max_cpus; i++) {
618
            if (state->ioreq_local_port[i] == port) {
619
                break;
620
            }
621
        }
622

    
623
        if (i == max_cpus) {
624
            hw_error("Fatal error while trying to get io event!\n");
625
        }
626

    
627
        /* unmask the wanted port again */
628
        xc_evtchn_unmask(state->xce_handle, port);
629

    
630
        /* get the io packet from shared memory */
631
        state->send_vcpu = i;
632
        return cpu_get_ioreq_from_shared_memory(state, i);
633
    }
634

    
635
    /* read error or read nothing */
636
    return NULL;
637
}
638

    
639
static uint32_t do_inp(pio_addr_t addr, unsigned long size)
640
{
641
    switch (size) {
642
        case 1:
643
            return cpu_inb(addr);
644
        case 2:
645
            return cpu_inw(addr);
646
        case 4:
647
            return cpu_inl(addr);
648
        default:
649
            hw_error("inp: bad size: %04"FMT_pioaddr" %lx", addr, size);
650
    }
651
}
652

    
653
static void do_outp(pio_addr_t addr,
654
        unsigned long size, uint32_t val)
655
{
656
    switch (size) {
657
        case 1:
658
            return cpu_outb(addr, val);
659
        case 2:
660
            return cpu_outw(addr, val);
661
        case 4:
662
            return cpu_outl(addr, val);
663
        default:
664
            hw_error("outp: bad size: %04"FMT_pioaddr" %lx", addr, size);
665
    }
666
}
667

    
668
/*
669
 * Helper functions which read/write an object from/to physical guest
670
 * memory, as part of the implementation of an ioreq.
671
 *
672
 * Equivalent to
673
 *   cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
674
 *                          val, req->size, 0/1)
675
 * except without the integer overflow problems.
676
 */
677
static void rw_phys_req_item(hwaddr addr,
678
                             ioreq_t *req, uint32_t i, void *val, int rw)
679
{
680
    /* Do everything unsigned so overflow just results in a truncated result
681
     * and accesses to undesired parts of guest memory, which is up
682
     * to the guest */
683
    hwaddr offset = (hwaddr)req->size * i;
684
    if (req->df) {
685
        addr -= offset;
686
    } else {
687
        addr += offset;
688
    }
689
    cpu_physical_memory_rw(addr, val, req->size, rw);
690
}
691

    
692
static inline void read_phys_req_item(hwaddr addr,
693
                                      ioreq_t *req, uint32_t i, void *val)
694
{
695
    rw_phys_req_item(addr, req, i, val, 0);
696
}
697
static inline void write_phys_req_item(hwaddr addr,
698
                                       ioreq_t *req, uint32_t i, void *val)
699
{
700
    rw_phys_req_item(addr, req, i, val, 1);
701
}
702

    
703

    
704
static void cpu_ioreq_pio(ioreq_t *req)
705
{
706
    uint32_t i;
707

    
708
    if (req->dir == IOREQ_READ) {
709
        if (!req->data_is_ptr) {
710
            req->data = do_inp(req->addr, req->size);
711
        } else {
712
            uint32_t tmp;
713

    
714
            for (i = 0; i < req->count; i++) {
715
                tmp = do_inp(req->addr, req->size);
716
                write_phys_req_item(req->data, req, i, &tmp);
717
            }
718
        }
719
    } else if (req->dir == IOREQ_WRITE) {
720
        if (!req->data_is_ptr) {
721
            do_outp(req->addr, req->size, req->data);
722
        } else {
723
            for (i = 0; i < req->count; i++) {
724
                uint32_t tmp = 0;
725

    
726
                read_phys_req_item(req->data, req, i, &tmp);
727
                do_outp(req->addr, req->size, tmp);
728
            }
729
        }
730
    }
731
}
732

    
733
static void cpu_ioreq_move(ioreq_t *req)
734
{
735
    uint32_t i;
736

    
737
    if (!req->data_is_ptr) {
738
        if (req->dir == IOREQ_READ) {
739
            for (i = 0; i < req->count; i++) {
740
                read_phys_req_item(req->addr, req, i, &req->data);
741
            }
742
        } else if (req->dir == IOREQ_WRITE) {
743
            for (i = 0; i < req->count; i++) {
744
                write_phys_req_item(req->addr, req, i, &req->data);
745
            }
746
        }
747
    } else {
748
        uint64_t tmp;
749

    
750
        if (req->dir == IOREQ_READ) {
751
            for (i = 0; i < req->count; i++) {
752
                read_phys_req_item(req->addr, req, i, &tmp);
753
                write_phys_req_item(req->data, req, i, &tmp);
754
            }
755
        } else if (req->dir == IOREQ_WRITE) {
756
            for (i = 0; i < req->count; i++) {
757
                read_phys_req_item(req->data, req, i, &tmp);
758
                write_phys_req_item(req->addr, req, i, &tmp);
759
            }
760
        }
761
    }
762
}
763

    
764
static void handle_ioreq(ioreq_t *req)
765
{
766
    if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
767
            (req->size < sizeof (target_ulong))) {
768
        req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
769
    }
770

    
771
    switch (req->type) {
772
        case IOREQ_TYPE_PIO:
773
            cpu_ioreq_pio(req);
774
            break;
775
        case IOREQ_TYPE_COPY:
776
            cpu_ioreq_move(req);
777
            break;
778
        case IOREQ_TYPE_TIMEOFFSET:
779
            break;
780
        case IOREQ_TYPE_INVALIDATE:
781
            xen_invalidate_map_cache();
782
            break;
783
        default:
784
            hw_error("Invalid ioreq type 0x%x\n", req->type);
785
    }
786
}
787

    
788
static int handle_buffered_iopage(XenIOState *state)
789
{
790
    buf_ioreq_t *buf_req = NULL;
791
    ioreq_t req;
792
    int qw;
793

    
794
    if (!state->buffered_io_page) {
795
        return 0;
796
    }
797

    
798
    memset(&req, 0x00, sizeof(req));
799

    
800
    while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) {
801
        buf_req = &state->buffered_io_page->buf_ioreq[
802
            state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM];
803
        req.size = 1UL << buf_req->size;
804
        req.count = 1;
805
        req.addr = buf_req->addr;
806
        req.data = buf_req->data;
807
        req.state = STATE_IOREQ_READY;
808
        req.dir = buf_req->dir;
809
        req.df = 1;
810
        req.type = buf_req->type;
811
        req.data_is_ptr = 0;
812
        qw = (req.size == 8);
813
        if (qw) {
814
            buf_req = &state->buffered_io_page->buf_ioreq[
815
                (state->buffered_io_page->read_pointer + 1) % IOREQ_BUFFER_SLOT_NUM];
816
            req.data |= ((uint64_t)buf_req->data) << 32;
817
        }
818

    
819
        handle_ioreq(&req);
820

    
821
        xen_mb();
822
        state->buffered_io_page->read_pointer += qw ? 2 : 1;
823
    }
824

    
825
    return req.count;
826
}
827

    
828
static void handle_buffered_io(void *opaque)
829
{
830
    XenIOState *state = opaque;
831

    
832
    if (handle_buffered_iopage(state)) {
833
        timer_mod(state->buffered_io_timer,
834
                BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
835
    } else {
836
        timer_del(state->buffered_io_timer);
837
        xc_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
838
    }
839
}
840

    
841
static void cpu_handle_ioreq(void *opaque)
842
{
843
    XenIOState *state = opaque;
844
    ioreq_t *req = cpu_get_ioreq(state);
845

    
846
    handle_buffered_iopage(state);
847
    if (req) {
848
        handle_ioreq(req);
849

    
850
        if (req->state != STATE_IOREQ_INPROCESS) {
851
            fprintf(stderr, "Badness in I/O request ... not in service?!: "
852
                    "%x, ptr: %x, port: %"PRIx64", "
853
                    "data: %"PRIx64", count: %" FMT_ioreq_size ", size: %" FMT_ioreq_size "\n",
854
                    req->state, req->data_is_ptr, req->addr,
855
                    req->data, req->count, req->size);
856
            destroy_hvm_domain(false);
857
            return;
858
        }
859

    
860
        xen_wmb(); /* Update ioreq contents /then/ update state. */
861

    
862
        /*
863
         * We do this before we send the response so that the tools
864
         * have the opportunity to pick up on the reset before the
865
         * guest resumes and does a hlt with interrupts disabled which
866
         * causes Xen to powerdown the domain.
867
         */
868
        if (runstate_is_running()) {
869
            if (qemu_shutdown_requested_get()) {
870
                destroy_hvm_domain(false);
871
            }
872
            if (qemu_reset_requested_get()) {
873
                qemu_system_reset(VMRESET_REPORT);
874
                destroy_hvm_domain(true);
875
            }
876
        }
877

    
878
        req->state = STATE_IORESP_READY;
879
        xc_evtchn_notify(state->xce_handle, state->ioreq_local_port[state->send_vcpu]);
880
    }
881
}
882

    
883
static int store_dev_info(int domid, CharDriverState *cs, const char *string)
884
{
885
    struct xs_handle *xs = NULL;
886
    char *path = NULL;
887
    char *newpath = NULL;
888
    char *pts = NULL;
889
    int ret = -1;
890

    
891
    /* Only continue if we're talking to a pty. */
892
    if (strncmp(cs->filename, "pty:", 4)) {
893
        return 0;
894
    }
895
    pts = cs->filename + 4;
896

    
897
    /* We now have everything we need to set the xenstore entry. */
898
    xs = xs_open(0);
899
    if (xs == NULL) {
900
        fprintf(stderr, "Could not contact XenStore\n");
901
        goto out;
902
    }
903

    
904
    path = xs_get_domain_path(xs, domid);
905
    if (path == NULL) {
906
        fprintf(stderr, "xs_get_domain_path() error\n");
907
        goto out;
908
    }
909
    newpath = realloc(path, (strlen(path) + strlen(string) +
910
                strlen("/tty") + 1));
911
    if (newpath == NULL) {
912
        fprintf(stderr, "realloc error\n");
913
        goto out;
914
    }
915
    path = newpath;
916

    
917
    strcat(path, string);
918
    strcat(path, "/tty");
919
    if (!xs_write(xs, XBT_NULL, path, pts, strlen(pts))) {
920
        fprintf(stderr, "xs_write for '%s' fail", string);
921
        goto out;
922
    }
923
    ret = 0;
924

    
925
out:
926
    free(path);
927
    xs_close(xs);
928

    
929
    return ret;
930
}
931

    
932
void xenstore_store_pv_console_info(int i, CharDriverState *chr)
933
{
934
    if (i == 0) {
935
        store_dev_info(xen_domid, chr, "/console");
936
    } else {
937
        char buf[32];
938
        snprintf(buf, sizeof(buf), "/device/console/%d", i);
939
        store_dev_info(xen_domid, chr, buf);
940
    }
941
}
942

    
943
static void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
944
{
945
    char path[50];
946

    
947
    if (xs == NULL) {
948
        fprintf(stderr, "xenstore connection not initialized\n");
949
        exit(1);
950
    }
951

    
952
    snprintf(path, sizeof (path), "device-model/%u/state", xen_domid);
953
    if (!xs_write(xs, XBT_NULL, path, state, strlen(state))) {
954
        fprintf(stderr, "error recording dm state\n");
955
        exit(1);
956
    }
957
}
958

    
959
static void xen_main_loop_prepare(XenIOState *state)
960
{
961
    int evtchn_fd = -1;
962

    
963
    if (state->xce_handle != XC_HANDLER_INITIAL_VALUE) {
964
        evtchn_fd = xc_evtchn_fd(state->xce_handle);
965
    }
966

    
967
    state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
968
                                                 state);
969

    
970
    if (evtchn_fd != -1) {
971
        qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
972
    }
973
}
974

    
975

    
976
/* Initialise Xen */
977

    
978
static void xen_change_state_handler(void *opaque, int running,
979
                                     RunState state)
980
{
981
    if (running) {
982
        /* record state running */
983
        xenstore_record_dm_state(xenstore, "running");
984
    }
985
}
986

    
987
static void xen_hvm_change_state_handler(void *opaque, int running,
988
                                         RunState rstate)
989
{
990
    XenIOState *xstate = opaque;
991
    if (running) {
992
        xen_main_loop_prepare(xstate);
993
    }
994
}
995

    
996
static void xen_exit_notifier(Notifier *n, void *data)
997
{
998
    XenIOState *state = container_of(n, XenIOState, exit);
999

    
1000
    xc_evtchn_close(state->xce_handle);
1001
    xs_daemon_close(state->xenstore);
1002
}
1003

    
1004
int xen_init(void)
1005
{
1006
    xen_xc = xen_xc_interface_open(0, 0, 0);
1007
    if (xen_xc == XC_HANDLER_INITIAL_VALUE) {
1008
        xen_be_printf(NULL, 0, "can't open xen interface\n");
1009
        return -1;
1010
    }
1011
    qemu_add_vm_change_state_handler(xen_change_state_handler, NULL);
1012

    
1013
    return 0;
1014
}
1015

    
1016
static void xen_read_physmap(XenIOState *state)
1017
{
1018
    XenPhysmap *physmap = NULL;
1019
    unsigned int len, num, i;
1020
    char path[80], *value = NULL;
1021
    char **entries = NULL;
1022

    
1023
    snprintf(path, sizeof(path),
1024
            "/local/domain/0/device-model/%d/physmap", xen_domid);
1025
    entries = xs_directory(state->xenstore, 0, path, &num);
1026
    if (entries == NULL)
1027
        return;
1028

    
1029
    for (i = 0; i < num; i++) {
1030
        physmap = g_malloc(sizeof (XenPhysmap));
1031
        physmap->phys_offset = strtoull(entries[i], NULL, 16);
1032
        snprintf(path, sizeof(path),
1033
                "/local/domain/0/device-model/%d/physmap/%s/start_addr",
1034
                xen_domid, entries[i]);
1035
        value = xs_read(state->xenstore, 0, path, &len);
1036
        if (value == NULL) {
1037
            g_free(physmap);
1038
            continue;
1039
        }
1040
        physmap->start_addr = strtoull(value, NULL, 16);
1041
        free(value);
1042

    
1043
        snprintf(path, sizeof(path),
1044
                "/local/domain/0/device-model/%d/physmap/%s/size",
1045
                xen_domid, entries[i]);
1046
        value = xs_read(state->xenstore, 0, path, &len);
1047
        if (value == NULL) {
1048
            g_free(physmap);
1049
            continue;
1050
        }
1051
        physmap->size = strtoull(value, NULL, 16);
1052
        free(value);
1053

    
1054
        snprintf(path, sizeof(path),
1055
                "/local/domain/0/device-model/%d/physmap/%s/name",
1056
                xen_domid, entries[i]);
1057
        physmap->name = xs_read(state->xenstore, 0, path, &len);
1058

    
1059
        QLIST_INSERT_HEAD(&state->physmap, physmap, list);
1060
    }
1061
    free(entries);
1062
}
1063

    
1064
static void xen_wakeup_notifier(Notifier *notifier, void *data)
1065
{
1066
    xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
1067
}
1068

    
1069
int xen_hvm_init(MemoryRegion **ram_memory)
1070
{
1071
    int i, rc;
1072
    unsigned long ioreq_pfn;
1073
    unsigned long bufioreq_evtchn;
1074
    XenIOState *state;
1075

    
1076
    state = g_malloc0(sizeof (XenIOState));
1077

    
1078
    state->xce_handle = xen_xc_evtchn_open(NULL, 0);
1079
    if (state->xce_handle == XC_HANDLER_INITIAL_VALUE) {
1080
        perror("xen: event channel open");
1081
        g_free(state);
1082
        return -errno;
1083
    }
1084

    
1085
    state->xenstore = xs_daemon_open();
1086
    if (state->xenstore == NULL) {
1087
        perror("xen: xenstore open");
1088
        g_free(state);
1089
        return -errno;
1090
    }
1091

    
1092
    state->exit.notify = xen_exit_notifier;
1093
    qemu_add_exit_notifier(&state->exit);
1094

    
1095
    state->suspend.notify = xen_suspend_notifier;
1096
    qemu_register_suspend_notifier(&state->suspend);
1097

    
1098
    state->wakeup.notify = xen_wakeup_notifier;
1099
    qemu_register_wakeup_notifier(&state->wakeup);
1100

    
1101
    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn);
1102
    DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
1103
    state->shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
1104
                                              PROT_READ|PROT_WRITE, ioreq_pfn);
1105
    if (state->shared_page == NULL) {
1106
        hw_error("map shared IO page returned error %d handle=" XC_INTERFACE_FMT,
1107
                 errno, xen_xc);
1108
    }
1109

    
1110
    xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn);
1111
    DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn);
1112
    state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
1113
                                                   PROT_READ|PROT_WRITE, ioreq_pfn);
1114
    if (state->buffered_io_page == NULL) {
1115
        hw_error("map buffered IO page returned error %d", errno);
1116
    }
1117

    
1118
    state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t));
1119

    
1120
    /* FIXME: how about if we overflow the page here? */
1121
    for (i = 0; i < max_cpus; i++) {
1122
        rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid,
1123
                                        xen_vcpu_eport(state->shared_page, i));
1124
        if (rc == -1) {
1125
            fprintf(stderr, "bind interdomain ioctl error %d\n", errno);
1126
            return -1;
1127
        }
1128
        state->ioreq_local_port[i] = rc;
1129
    }
1130

    
1131
    rc = xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_EVTCHN,
1132
            &bufioreq_evtchn);
1133
    if (rc < 0) {
1134
        fprintf(stderr, "failed to get HVM_PARAM_BUFIOREQ_EVTCHN\n");
1135
        return -1;
1136
    }
1137
    rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid,
1138
            (uint32_t)bufioreq_evtchn);
1139
    if (rc == -1) {
1140
        fprintf(stderr, "bind interdomain ioctl error %d\n", errno);
1141
        return -1;
1142
    }
1143
    state->bufioreq_local_port = rc;
1144

    
1145
    /* Init RAM management */
1146
    xen_map_cache_init(xen_phys_offset_to_gaddr, state);
1147
    xen_ram_init(ram_size, ram_memory);
1148

    
1149
    qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
1150

    
1151
    state->memory_listener = xen_memory_listener;
1152
    QLIST_INIT(&state->physmap);
1153
    memory_listener_register(&state->memory_listener, &address_space_memory);
1154
    state->log_for_dirtybit = NULL;
1155

    
1156
    /* Initialize backend core & drivers */
1157
    if (xen_be_init() != 0) {
1158
        fprintf(stderr, "%s: xen backend core setup failed\n", __FUNCTION__);
1159
        exit(1);
1160
    }
1161
    xen_be_register("console", &xen_console_ops);
1162
    xen_be_register("vkbd", &xen_kbdmouse_ops);
1163
    xen_be_register("qdisk", &xen_blkdev_ops);
1164
    xen_read_physmap(state);
1165

    
1166
    return 0;
1167
}
1168

    
1169
void destroy_hvm_domain(bool reboot)
1170
{
1171
    XenXC xc_handle;
1172
    int sts;
1173

    
1174
    xc_handle = xen_xc_interface_open(0, 0, 0);
1175
    if (xc_handle == XC_HANDLER_INITIAL_VALUE) {
1176
        fprintf(stderr, "Cannot acquire xenctrl handle\n");
1177
    } else {
1178
        sts = xc_domain_shutdown(xc_handle, xen_domid,
1179
                                 reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff);
1180
        if (sts != 0) {
1181
            fprintf(stderr, "xc_domain_shutdown failed to issue %s, "
1182
                    "sts %d, %s\n", reboot ? "reboot" : "poweroff",
1183
                    sts, strerror(errno));
1184
        } else {
1185
            fprintf(stderr, "Issued domain %d %s\n", xen_domid,
1186
                    reboot ? "reboot" : "poweroff");
1187
        }
1188
        xc_interface_close(xc_handle);
1189
    }
1190
}
1191

    
1192
void xen_register_framebuffer(MemoryRegion *mr)
1193
{
1194
    framebuffer = mr;
1195
}
1196

    
1197
void xen_shutdown_fatal_error(const char *fmt, ...)
1198
{
1199
    va_list ap;
1200

    
1201
    va_start(ap, fmt);
1202
    vfprintf(stderr, fmt, ap);
1203
    va_end(ap);
1204
    fprintf(stderr, "Will destroy the domain.\n");
1205
    /* destroy the domain */
1206
    qemu_system_shutdown_request();
1207
}
1208

    
1209
void xen_modified_memory(ram_addr_t start, ram_addr_t length)
1210
{
1211
    if (unlikely(xen_in_migration)) {
1212
        int rc;
1213
        ram_addr_t start_pfn, nb_pages;
1214

    
1215
        if (length == 0) {
1216
            length = TARGET_PAGE_SIZE;
1217
        }
1218
        start_pfn = start >> TARGET_PAGE_BITS;
1219
        nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
1220
            - start_pfn;
1221
        rc = xc_hvm_modified_memory(xen_xc, xen_domid, start_pfn, nb_pages);
1222
        if (rc) {
1223
            fprintf(stderr,
1224
                    "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
1225
                    __func__, start, nb_pages, rc, strerror(-rc));
1226
        }
1227
    }
1228
}