Statistics
| Branch: | Revision:

root / hw / vhost.c @ 5430a28f

History | View | Annotate | Download (22.5 kB)

1
/*
2
 * vhost support
3
 *
4
 * Copyright Red Hat, Inc. 2010
5
 *
6
 * Authors:
7
 *  Michael S. Tsirkin <mst@redhat.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 */
12

    
13
#include <sys/ioctl.h>
14
#include "vhost.h"
15
#include "hw/hw.h"
16
#include "range.h"
17
#include <linux/vhost.h>
18

    
19
static void vhost_dev_sync_region(struct vhost_dev *dev,
20
                                  uint64_t mfirst, uint64_t mlast,
21
                                  uint64_t rfirst, uint64_t rlast)
22
{
23
    uint64_t start = MAX(mfirst, rfirst);
24
    uint64_t end = MIN(mlast, rlast);
25
    vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
26
    vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
27
    uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
28

    
29
    assert(end / VHOST_LOG_CHUNK < dev->log_size);
30
    assert(start / VHOST_LOG_CHUNK < dev->log_size);
31
    if (end < start) {
32
        return;
33
    }
34
    for (;from < to; ++from) {
35
        vhost_log_chunk_t log;
36
        int bit;
37
        /* We first check with non-atomic: much cheaper,
38
         * and we expect non-dirty to be the common case. */
39
        if (!*from) {
40
            addr += VHOST_LOG_CHUNK;
41
            continue;
42
        }
43
        /* Data must be read atomically. We don't really
44
         * need the barrier semantics of __sync
45
         * builtins, but it's easier to use them than
46
         * roll our own. */
47
        log = __sync_fetch_and_and(from, 0);
48
        while ((bit = sizeof(log) > sizeof(int) ?
49
                ffsll(log) : ffs(log))) {
50
            bit -= 1;
51
            cpu_physical_memory_set_dirty(addr + bit * VHOST_LOG_PAGE);
52
            log &= ~(0x1ull << bit);
53
        }
54
        addr += VHOST_LOG_CHUNK;
55
    }
56
}
57

    
58
static int vhost_client_sync_dirty_bitmap(CPUPhysMemoryClient *client,
59
                                          target_phys_addr_t start_addr,
60
                                          target_phys_addr_t end_addr)
61
{
62
    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
63
    int i;
64
    if (!dev->log_enabled || !dev->started) {
65
        return 0;
66
    }
67
    for (i = 0; i < dev->mem->nregions; ++i) {
68
        struct vhost_memory_region *reg = dev->mem->regions + i;
69
        vhost_dev_sync_region(dev, start_addr, end_addr,
70
                              reg->guest_phys_addr,
71
                              range_get_last(reg->guest_phys_addr,
72
                                             reg->memory_size));
73
    }
74
    for (i = 0; i < dev->nvqs; ++i) {
75
        struct vhost_virtqueue *vq = dev->vqs + i;
76
        vhost_dev_sync_region(dev, start_addr, end_addr, vq->used_phys,
77
                              range_get_last(vq->used_phys, vq->used_size));
78
    }
79
    return 0;
80
}
81

    
82
/* Assign/unassign. Keep an unsorted array of non-overlapping
83
 * memory regions in dev->mem. */
84
static void vhost_dev_unassign_memory(struct vhost_dev *dev,
85
                                      uint64_t start_addr,
86
                                      uint64_t size)
87
{
88
    int from, to, n = dev->mem->nregions;
89
    /* Track overlapping/split regions for sanity checking. */
90
    int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
91

    
92
    for (from = 0, to = 0; from < n; ++from, ++to) {
93
        struct vhost_memory_region *reg = dev->mem->regions + to;
94
        uint64_t reglast;
95
        uint64_t memlast;
96
        uint64_t change;
97

    
98
        /* clone old region */
99
        if (to != from) {
100
            memcpy(reg, dev->mem->regions + from, sizeof *reg);
101
        }
102

    
103
        /* No overlap is simple */
104
        if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
105
                            start_addr, size)) {
106
            continue;
107
        }
108

    
109
        /* Split only happens if supplied region
110
         * is in the middle of an existing one. Thus it can not
111
         * overlap with any other existing region. */
112
        assert(!split);
113

    
114
        reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
115
        memlast = range_get_last(start_addr, size);
116

    
117
        /* Remove whole region */
118
        if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
119
            --dev->mem->nregions;
120
            --to;
121
            assert(to >= 0);
122
            ++overlap_middle;
123
            continue;
124
        }
125

    
126
        /* Shrink region */
127
        if (memlast >= reglast) {
128
            reg->memory_size = start_addr - reg->guest_phys_addr;
129
            assert(reg->memory_size);
130
            assert(!overlap_end);
131
            ++overlap_end;
132
            continue;
133
        }
134

    
135
        /* Shift region */
136
        if (start_addr <= reg->guest_phys_addr) {
137
            change = memlast + 1 - reg->guest_phys_addr;
138
            reg->memory_size -= change;
139
            reg->guest_phys_addr += change;
140
            reg->userspace_addr += change;
141
            assert(reg->memory_size);
142
            assert(!overlap_start);
143
            ++overlap_start;
144
            continue;
145
        }
146

    
147
        /* This only happens if supplied region
148
         * is in the middle of an existing one. Thus it can not
149
         * overlap with any other existing region. */
150
        assert(!overlap_start);
151
        assert(!overlap_end);
152
        assert(!overlap_middle);
153
        /* Split region: shrink first part, shift second part. */
154
        memcpy(dev->mem->regions + n, reg, sizeof *reg);
155
        reg->memory_size = start_addr - reg->guest_phys_addr;
156
        assert(reg->memory_size);
157
        change = memlast + 1 - reg->guest_phys_addr;
158
        reg = dev->mem->regions + n;
159
        reg->memory_size -= change;
160
        assert(reg->memory_size);
161
        reg->guest_phys_addr += change;
162
        reg->userspace_addr += change;
163
        /* Never add more than 1 region */
164
        assert(dev->mem->nregions == n);
165
        ++dev->mem->nregions;
166
        ++split;
167
    }
168
}
169

    
170
/* Called after unassign, so no regions overlap the given range. */
171
static void vhost_dev_assign_memory(struct vhost_dev *dev,
172
                                    uint64_t start_addr,
173
                                    uint64_t size,
174
                                    uint64_t uaddr)
175
{
176
    int from, to;
177
    struct vhost_memory_region *merged = NULL;
178
    for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
179
        struct vhost_memory_region *reg = dev->mem->regions + to;
180
        uint64_t prlast, urlast;
181
        uint64_t pmlast, umlast;
182
        uint64_t s, e, u;
183

    
184
        /* clone old region */
185
        if (to != from) {
186
            memcpy(reg, dev->mem->regions + from, sizeof *reg);
187
        }
188
        prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
189
        pmlast = range_get_last(start_addr, size);
190
        urlast = range_get_last(reg->userspace_addr, reg->memory_size);
191
        umlast = range_get_last(uaddr, size);
192

    
193
        /* check for overlapping regions: should never happen. */
194
        assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
195
        /* Not an adjacent or overlapping region - do not merge. */
196
        if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
197
            (pmlast + 1 != reg->guest_phys_addr ||
198
             umlast + 1 != reg->userspace_addr)) {
199
            continue;
200
        }
201

    
202
        if (merged) {
203
            --to;
204
            assert(to >= 0);
205
        } else {
206
            merged = reg;
207
        }
208
        u = MIN(uaddr, reg->userspace_addr);
209
        s = MIN(start_addr, reg->guest_phys_addr);
210
        e = MAX(pmlast, prlast);
211
        uaddr = merged->userspace_addr = u;
212
        start_addr = merged->guest_phys_addr = s;
213
        size = merged->memory_size = e - s + 1;
214
        assert(merged->memory_size);
215
    }
216

    
217
    if (!merged) {
218
        struct vhost_memory_region *reg = dev->mem->regions + to;
219
        memset(reg, 0, sizeof *reg);
220
        reg->memory_size = size;
221
        assert(reg->memory_size);
222
        reg->guest_phys_addr = start_addr;
223
        reg->userspace_addr = uaddr;
224
        ++to;
225
    }
226
    assert(to <= dev->mem->nregions + 1);
227
    dev->mem->nregions = to;
228
}
229

    
230
static uint64_t vhost_get_log_size(struct vhost_dev *dev)
231
{
232
    uint64_t log_size = 0;
233
    int i;
234
    for (i = 0; i < dev->mem->nregions; ++i) {
235
        struct vhost_memory_region *reg = dev->mem->regions + i;
236
        uint64_t last = range_get_last(reg->guest_phys_addr,
237
                                       reg->memory_size);
238
        log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
239
    }
240
    for (i = 0; i < dev->nvqs; ++i) {
241
        struct vhost_virtqueue *vq = dev->vqs + i;
242
        uint64_t last = vq->used_phys + vq->used_size - 1;
243
        log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
244
    }
245
    return log_size;
246
}
247

    
248
static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
249
{
250
    vhost_log_chunk_t *log;
251
    uint64_t log_base;
252
    int r;
253
    if (size) {
254
        log = qemu_mallocz(size * sizeof *log);
255
    } else {
256
        log = NULL;
257
    }
258
    log_base = (uint64_t)(unsigned long)log;
259
    r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
260
    assert(r >= 0);
261
    vhost_client_sync_dirty_bitmap(&dev->client, 0,
262
                                   (target_phys_addr_t)~0x0ull);
263
    if (dev->log) {
264
        qemu_free(dev->log);
265
    }
266
    dev->log = log;
267
    dev->log_size = size;
268
}
269

    
270
static int vhost_verify_ring_mappings(struct vhost_dev *dev,
271
                                      uint64_t start_addr,
272
                                      uint64_t size)
273
{
274
    int i;
275
    for (i = 0; i < dev->nvqs; ++i) {
276
        struct vhost_virtqueue *vq = dev->vqs + i;
277
        target_phys_addr_t l;
278
        void *p;
279

    
280
        if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
281
            continue;
282
        }
283
        l = vq->ring_size;
284
        p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
285
        if (!p || l != vq->ring_size) {
286
            fprintf(stderr, "Unable to map ring buffer for ring %d\n", i);
287
            return -ENOMEM;
288
        }
289
        if (p != vq->ring) {
290
            fprintf(stderr, "Ring buffer relocated for ring %d\n", i);
291
            return -EBUSY;
292
        }
293
        cpu_physical_memory_unmap(p, l, 0, 0);
294
    }
295
    return 0;
296
}
297

    
298
static void vhost_client_set_memory(CPUPhysMemoryClient *client,
299
                                    target_phys_addr_t start_addr,
300
                                    ram_addr_t size,
301
                                    ram_addr_t phys_offset)
302
{
303
    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
304
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
305
    int s = offsetof(struct vhost_memory, regions) +
306
        (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
307
    uint64_t log_size;
308
    int r;
309
    dev->mem = qemu_realloc(dev->mem, s);
310

    
311
    assert(size);
312

    
313
    vhost_dev_unassign_memory(dev, start_addr, size);
314
    if (flags == IO_MEM_RAM) {
315
        /* Add given mapping, merging adjacent regions if any */
316
        vhost_dev_assign_memory(dev, start_addr, size,
317
                                (uintptr_t)qemu_get_ram_ptr(phys_offset));
318
    } else {
319
        /* Remove old mapping for this memory, if any. */
320
        vhost_dev_unassign_memory(dev, start_addr, size);
321
    }
322

    
323
    if (!dev->started) {
324
        return;
325
    }
326

    
327
    if (dev->started) {
328
        r = vhost_verify_ring_mappings(dev, start_addr, size);
329
        assert(r >= 0);
330
    }
331

    
332
    if (!dev->log_enabled) {
333
        r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
334
        assert(r >= 0);
335
        return;
336
    }
337
    log_size = vhost_get_log_size(dev);
338
    /* We allocate an extra 4K bytes to log,
339
     * to reduce the * number of reallocations. */
340
#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
341
    /* To log more, must increase log size before table update. */
342
    if (dev->log_size < log_size) {
343
        vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
344
    }
345
    r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
346
    assert(r >= 0);
347
    /* To log less, can only decrease log size after table update. */
348
    if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
349
        vhost_dev_log_resize(dev, log_size);
350
    }
351
}
352

    
353
static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
354
                                    struct vhost_virtqueue *vq,
355
                                    unsigned idx, bool enable_log)
356
{
357
    struct vhost_vring_addr addr = {
358
        .index = idx,
359
        .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
360
        .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
361
        .used_user_addr = (uint64_t)(unsigned long)vq->used,
362
        .log_guest_addr = vq->used_phys,
363
        .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
364
    };
365
    int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
366
    if (r < 0) {
367
        return -errno;
368
    }
369
    return 0;
370
}
371

    
372
static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
373
{
374
    uint64_t features = dev->acked_features;
375
    int r;
376
    if (enable_log) {
377
        features |= 0x1 << VHOST_F_LOG_ALL;
378
    }
379
    r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
380
    return r < 0 ? -errno : 0;
381
}
382

    
383
static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
384
{
385
    int r, t, i;
386
    r = vhost_dev_set_features(dev, enable_log);
387
    if (r < 0) {
388
        goto err_features;
389
    }
390
    for (i = 0; i < dev->nvqs; ++i) {
391
        r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
392
                                     enable_log);
393
        if (r < 0) {
394
            goto err_vq;
395
        }
396
    }
397
    return 0;
398
err_vq:
399
    for (; i >= 0; --i) {
400
        t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
401
                                     dev->log_enabled);
402
        assert(t >= 0);
403
    }
404
    t = vhost_dev_set_features(dev, dev->log_enabled);
405
    assert(t >= 0);
406
err_features:
407
    return r;
408
}
409

    
410
static int vhost_client_migration_log(CPUPhysMemoryClient *client,
411
                                      int enable)
412
{
413
    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
414
    int r;
415
    if (!!enable == dev->log_enabled) {
416
        return 0;
417
    }
418
    if (!dev->started) {
419
        dev->log_enabled = enable;
420
        return 0;
421
    }
422
    if (!enable) {
423
        r = vhost_dev_set_log(dev, false);
424
        if (r < 0) {
425
            return r;
426
        }
427
        if (dev->log) {
428
            qemu_free(dev->log);
429
        }
430
        dev->log = NULL;
431
        dev->log_size = 0;
432
    } else {
433
        vhost_dev_log_resize(dev, vhost_get_log_size(dev));
434
        r = vhost_dev_set_log(dev, true);
435
        if (r < 0) {
436
            return r;
437
        }
438
    }
439
    dev->log_enabled = enable;
440
    return 0;
441
}
442

    
443
static int vhost_virtqueue_init(struct vhost_dev *dev,
444
                                struct VirtIODevice *vdev,
445
                                struct vhost_virtqueue *vq,
446
                                unsigned idx)
447
{
448
    target_phys_addr_t s, l, a;
449
    int r;
450
    struct vhost_vring_file file = {
451
        .index = idx,
452
    };
453
    struct vhost_vring_state state = {
454
        .index = idx,
455
    };
456
    struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
457

    
458
    if (!vdev->binding->set_host_notifier) {
459
        fprintf(stderr, "binding does not support host notifiers\n");
460
        return -ENOSYS;
461
    }
462

    
463
    vq->num = state.num = virtio_queue_get_num(vdev, idx);
464
    r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
465
    if (r) {
466
        return -errno;
467
    }
468

    
469
    state.num = virtio_queue_get_last_avail_idx(vdev, idx);
470
    r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
471
    if (r) {
472
        return -errno;
473
    }
474

    
475
    s = l = virtio_queue_get_desc_size(vdev, idx);
476
    a = virtio_queue_get_desc_addr(vdev, idx);
477
    vq->desc = cpu_physical_memory_map(a, &l, 0);
478
    if (!vq->desc || l != s) {
479
        r = -ENOMEM;
480
        goto fail_alloc_desc;
481
    }
482
    s = l = virtio_queue_get_avail_size(vdev, idx);
483
    a = virtio_queue_get_avail_addr(vdev, idx);
484
    vq->avail = cpu_physical_memory_map(a, &l, 0);
485
    if (!vq->avail || l != s) {
486
        r = -ENOMEM;
487
        goto fail_alloc_avail;
488
    }
489
    vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
490
    vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
491
    vq->used = cpu_physical_memory_map(a, &l, 1);
492
    if (!vq->used || l != s) {
493
        r = -ENOMEM;
494
        goto fail_alloc_used;
495
    }
496

    
497
    vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
498
    vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
499
    vq->ring = cpu_physical_memory_map(a, &l, 1);
500
    if (!vq->ring || l != s) {
501
        r = -ENOMEM;
502
        goto fail_alloc_ring;
503
    }
504

    
505
    r = vhost_virtqueue_set_addr(dev, vq, idx, dev->log_enabled);
506
    if (r < 0) {
507
        r = -errno;
508
        goto fail_alloc;
509
    }
510
    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
511
    if (r < 0) {
512
        fprintf(stderr, "Error binding host notifier: %d\n", -r);
513
        goto fail_host_notifier;
514
    }
515

    
516
    file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
517
    r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
518
    if (r) {
519
        r = -errno;
520
        goto fail_kick;
521
    }
522

    
523
    file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
524
    r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
525
    if (r) {
526
        r = -errno;
527
        goto fail_call;
528
    }
529

    
530
    return 0;
531

    
532
fail_call:
533
fail_kick:
534
    vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
535
fail_host_notifier:
536
fail_alloc:
537
    cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
538
                              0, 0);
539
fail_alloc_ring:
540
    cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
541
                              0, 0);
542
fail_alloc_used:
543
    cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
544
                              0, 0);
545
fail_alloc_avail:
546
    cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
547
                              0, 0);
548
fail_alloc_desc:
549
    return r;
550
}
551

    
552
static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
553
                                    struct VirtIODevice *vdev,
554
                                    struct vhost_virtqueue *vq,
555
                                    unsigned idx)
556
{
557
    struct vhost_vring_state state = {
558
        .index = idx,
559
    };
560
    int r;
561
    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
562
    if (r < 0) {
563
        fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
564
        fflush(stderr);
565
    }
566
    assert (r >= 0);
567
    r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
568
    if (r < 0) {
569
        fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
570
        fflush(stderr);
571
    }
572
    virtio_queue_set_last_avail_idx(vdev, idx, state.num);
573
    assert (r >= 0);
574
    cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
575
                              0, virtio_queue_get_ring_size(vdev, idx));
576
    cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
577
                              1, virtio_queue_get_used_size(vdev, idx));
578
    cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
579
                              0, virtio_queue_get_avail_size(vdev, idx));
580
    cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
581
                              0, virtio_queue_get_desc_size(vdev, idx));
582
}
583

    
584
int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
585
{
586
    uint64_t features;
587
    int r;
588
    if (devfd >= 0) {
589
        hdev->control = devfd;
590
    } else {
591
        hdev->control = open("/dev/vhost-net", O_RDWR);
592
        if (hdev->control < 0) {
593
            return -errno;
594
        }
595
    }
596
    r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
597
    if (r < 0) {
598
        goto fail;
599
    }
600

    
601
    r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
602
    if (r < 0) {
603
        goto fail;
604
    }
605
    hdev->features = features;
606

    
607
    hdev->client.set_memory = vhost_client_set_memory;
608
    hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
609
    hdev->client.migration_log = vhost_client_migration_log;
610
    hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
611
    hdev->log = NULL;
612
    hdev->log_size = 0;
613
    hdev->log_enabled = false;
614
    hdev->started = false;
615
    cpu_register_phys_memory_client(&hdev->client);
616
    hdev->force = force;
617
    return 0;
618
fail:
619
    r = -errno;
620
    close(hdev->control);
621
    return r;
622
}
623

    
624
void vhost_dev_cleanup(struct vhost_dev *hdev)
625
{
626
    cpu_unregister_phys_memory_client(&hdev->client);
627
    qemu_free(hdev->mem);
628
    close(hdev->control);
629
}
630

    
631
bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
632
{
633
    return !vdev->binding->query_guest_notifiers ||
634
        vdev->binding->query_guest_notifiers(vdev->binding_opaque) ||
635
        hdev->force;
636
}
637

    
638
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
639
{
640
    int i, r;
641
    if (!vdev->binding->set_guest_notifiers) {
642
        fprintf(stderr, "binding does not support guest notifiers\n");
643
        r = -ENOSYS;
644
        goto fail;
645
    }
646

    
647
    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
648
    if (r < 0) {
649
        fprintf(stderr, "Error binding guest notifier: %d\n", -r);
650
        goto fail_notifiers;
651
    }
652

    
653
    r = vhost_dev_set_features(hdev, hdev->log_enabled);
654
    if (r < 0) {
655
        goto fail_features;
656
    }
657
    r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
658
    if (r < 0) {
659
        r = -errno;
660
        goto fail_mem;
661
    }
662
    for (i = 0; i < hdev->nvqs; ++i) {
663
        r = vhost_virtqueue_init(hdev,
664
                                 vdev,
665
                                 hdev->vqs + i,
666
                                 i);
667
        if (r < 0) {
668
            goto fail_vq;
669
        }
670
    }
671

    
672
    if (hdev->log_enabled) {
673
        hdev->log_size = vhost_get_log_size(hdev);
674
        hdev->log = hdev->log_size ?
675
            qemu_mallocz(hdev->log_size * sizeof *hdev->log) : NULL;
676
        r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
677
                  (uint64_t)(unsigned long)hdev->log);
678
        if (r < 0) {
679
            r = -errno;
680
            goto fail_log;
681
        }
682
    }
683

    
684
    hdev->started = true;
685

    
686
    return 0;
687
fail_log:
688
fail_vq:
689
    while (--i >= 0) {
690
        vhost_virtqueue_cleanup(hdev,
691
                                vdev,
692
                                hdev->vqs + i,
693
                                i);
694
    }
695
fail_mem:
696
fail_features:
697
    vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
698
fail_notifiers:
699
fail:
700
    return r;
701
}
702

    
703
void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
704
{
705
    int i, r;
706

    
707
    for (i = 0; i < hdev->nvqs; ++i) {
708
        vhost_virtqueue_cleanup(hdev,
709
                                vdev,
710
                                hdev->vqs + i,
711
                                i);
712
    }
713
    vhost_client_sync_dirty_bitmap(&hdev->client, 0,
714
                                   (target_phys_addr_t)~0x0ull);
715
    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
716
    if (r < 0) {
717
        fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
718
        fflush(stderr);
719
    }
720
    assert (r >= 0);
721

    
722
    hdev->started = false;
723
    qemu_free(hdev->log);
724
    hdev->log_size = 0;
725
}