Statistics
| Branch: | Revision:

root / hw / virtio-net.c @ 7830cf78

History | View | Annotate | Download (41 kB)

1
/*
2
 * Virtio Network Device
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include "qemu/iov.h"
15
#include "virtio.h"
16
#include "net/net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19
#include "qemu/error-report.h"
20
#include "qemu/timer.h"
21
#include "virtio-net.h"
22
#include "vhost_net.h"
23

    
24
#define VIRTIO_NET_VM_VERSION    11
25

    
26
#define MAC_TABLE_ENTRIES    64
27
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
28

    
29
typedef struct VirtIONetQueue {
30
    VirtQueue *rx_vq;
31
    VirtQueue *tx_vq;
32
    QEMUTimer *tx_timer;
33
    QEMUBH *tx_bh;
34
    int tx_waiting;
35
    struct {
36
        VirtQueueElement elem;
37
        ssize_t len;
38
    } async_tx;
39
    struct VirtIONet *n;
40
} VirtIONetQueue;
41

    
42
typedef struct VirtIONet
43
{
44
    VirtIODevice vdev;
45
    uint8_t mac[ETH_ALEN];
46
    uint16_t status;
47
    VirtIONetQueue *vqs;
48
    VirtQueue *ctrl_vq;
49
    NICState *nic;
50
    uint32_t tx_timeout;
51
    int32_t tx_burst;
52
    uint32_t has_vnet_hdr;
53
    size_t host_hdr_len;
54
    size_t guest_hdr_len;
55
    uint8_t has_ufo;
56
    int mergeable_rx_bufs;
57
    uint8_t promisc;
58
    uint8_t allmulti;
59
    uint8_t alluni;
60
    uint8_t nomulti;
61
    uint8_t nouni;
62
    uint8_t nobcast;
63
    uint8_t vhost_started;
64
    struct {
65
        int in_use;
66
        int first_multi;
67
        uint8_t multi_overflow;
68
        uint8_t uni_overflow;
69
        uint8_t *macs;
70
    } mac_table;
71
    uint32_t *vlans;
72
    DeviceState *qdev;
73
    int multiqueue;
74
    uint16_t max_queues;
75
    uint16_t curr_queues;
76
    size_t config_size;
77
} VirtIONet;
78

    
79
/*
80
 * Calculate the number of bytes up to and including the given 'field' of
81
 * 'container'.
82
 */
83
#define endof(container, field) \
84
    (offsetof(container, field) + sizeof(((container *)0)->field))
85

    
86
typedef struct VirtIOFeature {
87
    uint32_t flags;
88
    size_t end;
89
} VirtIOFeature;
90

    
91
static VirtIOFeature feature_sizes[] = {
92
    {.flags = 1 << VIRTIO_NET_F_MAC,
93
     .end = endof(struct virtio_net_config, mac)},
94
    {.flags = 1 << VIRTIO_NET_F_STATUS,
95
     .end = endof(struct virtio_net_config, status)},
96
    {.flags = 1 << VIRTIO_NET_F_MQ,
97
     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
98
    {}
99
};
100

    
101
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
102
{
103
    VirtIONet *n = qemu_get_nic_opaque(nc);
104

    
105
    return &n->vqs[nc->queue_index];
106
}
107

    
108
static int vq2q(int queue_index)
109
{
110
    return queue_index / 2;
111
}
112

    
113
/* TODO
114
 * - we could suppress RX interrupt if we were so inclined.
115
 */
116

    
117
static VirtIONet *to_virtio_net(VirtIODevice *vdev)
118
{
119
    return (VirtIONet *)vdev;
120
}
121

    
122
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
123
{
124
    VirtIONet *n = to_virtio_net(vdev);
125
    struct virtio_net_config netcfg;
126

    
127
    stw_p(&netcfg.status, n->status);
128
    stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
129
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
130
    memcpy(config, &netcfg, n->config_size);
131
}
132

    
133
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
134
{
135
    VirtIONet *n = to_virtio_net(vdev);
136
    struct virtio_net_config netcfg = {};
137

    
138
    memcpy(&netcfg, config, n->config_size);
139

    
140
    if (!(n->vdev.guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
141
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
142
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
143
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
144
    }
145
}
146

    
147
static bool virtio_net_started(VirtIONet *n, uint8_t status)
148
{
149
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
150
        (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
151
}
152

    
153
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
154
{
155
    NetClientState *nc = qemu_get_queue(n->nic);
156
    int queues = n->multiqueue ? n->max_queues : 1;
157

    
158
    if (!nc->peer) {
159
        return;
160
    }
161
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
162
        return;
163
    }
164

    
165
    if (!tap_get_vhost_net(nc->peer)) {
166
        return;
167
    }
168

    
169
    if (!!n->vhost_started == virtio_net_started(n, status) &&
170
                              !nc->peer->link_down) {
171
        return;
172
    }
173
    if (!n->vhost_started) {
174
        int r;
175
        if (!vhost_net_query(tap_get_vhost_net(nc->peer), &n->vdev)) {
176
            return;
177
        }
178
        n->vhost_started = 1;
179
        r = vhost_net_start(&n->vdev, n->nic->ncs, queues);
180
        if (r < 0) {
181
            error_report("unable to start vhost net: %d: "
182
                         "falling back on userspace virtio", -r);
183
            n->vhost_started = 0;
184
        }
185
    } else {
186
        vhost_net_stop(&n->vdev, n->nic->ncs, queues);
187
        n->vhost_started = 0;
188
    }
189
}
190

    
191
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
192
{
193
    VirtIONet *n = to_virtio_net(vdev);
194
    VirtIONetQueue *q;
195
    int i;
196
    uint8_t queue_status;
197

    
198
    virtio_net_vhost_status(n, status);
199

    
200
    for (i = 0; i < n->max_queues; i++) {
201
        q = &n->vqs[i];
202

    
203
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
204
            queue_status = 0;
205
        } else {
206
            queue_status = status;
207
        }
208

    
209
        if (!q->tx_waiting) {
210
            continue;
211
        }
212

    
213
        if (virtio_net_started(n, queue_status) && !n->vhost_started) {
214
            if (q->tx_timer) {
215
                qemu_mod_timer(q->tx_timer,
216
                               qemu_get_clock_ns(vm_clock) + n->tx_timeout);
217
            } else {
218
                qemu_bh_schedule(q->tx_bh);
219
            }
220
        } else {
221
            if (q->tx_timer) {
222
                qemu_del_timer(q->tx_timer);
223
            } else {
224
                qemu_bh_cancel(q->tx_bh);
225
            }
226
        }
227
    }
228
}
229

    
230
static void virtio_net_set_link_status(NetClientState *nc)
231
{
232
    VirtIONet *n = qemu_get_nic_opaque(nc);
233
    uint16_t old_status = n->status;
234

    
235
    if (nc->link_down)
236
        n->status &= ~VIRTIO_NET_S_LINK_UP;
237
    else
238
        n->status |= VIRTIO_NET_S_LINK_UP;
239

    
240
    if (n->status != old_status)
241
        virtio_notify_config(&n->vdev);
242

    
243
    virtio_net_set_status(&n->vdev, n->vdev.status);
244
}
245

    
246
static void virtio_net_reset(VirtIODevice *vdev)
247
{
248
    VirtIONet *n = to_virtio_net(vdev);
249

    
250
    /* Reset back to compatibility mode */
251
    n->promisc = 1;
252
    n->allmulti = 0;
253
    n->alluni = 0;
254
    n->nomulti = 0;
255
    n->nouni = 0;
256
    n->nobcast = 0;
257
    /* multiqueue is disabled by default */
258
    n->curr_queues = 1;
259

    
260
    /* Flush any MAC and VLAN filter table state */
261
    n->mac_table.in_use = 0;
262
    n->mac_table.first_multi = 0;
263
    n->mac_table.multi_overflow = 0;
264
    n->mac_table.uni_overflow = 0;
265
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
266
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
267
    memset(n->vlans, 0, MAX_VLAN >> 3);
268
}
269

    
270
static void peer_test_vnet_hdr(VirtIONet *n)
271
{
272
    NetClientState *nc = qemu_get_queue(n->nic);
273
    if (!nc->peer) {
274
        return;
275
    }
276

    
277
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
278
        return;
279
    }
280

    
281
    n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
282
}
283

    
284
static int peer_has_vnet_hdr(VirtIONet *n)
285
{
286
    return n->has_vnet_hdr;
287
}
288

    
289
static int peer_has_ufo(VirtIONet *n)
290
{
291
    if (!peer_has_vnet_hdr(n))
292
        return 0;
293

    
294
    n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
295

    
296
    return n->has_ufo;
297
}
298

    
299
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
300
{
301
    int i;
302
    NetClientState *nc;
303

    
304
    n->mergeable_rx_bufs = mergeable_rx_bufs;
305

    
306
    n->guest_hdr_len = n->mergeable_rx_bufs ?
307
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
308

    
309
    for (i = 0; i < n->max_queues; i++) {
310
        nc = qemu_get_subqueue(n->nic, i);
311

    
312
        if (peer_has_vnet_hdr(n) &&
313
            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
314
            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
315
            n->host_hdr_len = n->guest_hdr_len;
316
        }
317
    }
318
}
319

    
320
static int peer_attach(VirtIONet *n, int index)
321
{
322
    NetClientState *nc = qemu_get_subqueue(n->nic, index);
323

    
324
    if (!nc->peer) {
325
        return 0;
326
    }
327

    
328
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
329
        return 0;
330
    }
331

    
332
    return tap_enable(nc->peer);
333
}
334

    
335
static int peer_detach(VirtIONet *n, int index)
336
{
337
    NetClientState *nc = qemu_get_subqueue(n->nic, index);
338

    
339
    if (!nc->peer) {
340
        return 0;
341
    }
342

    
343
    if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
344
        return 0;
345
    }
346

    
347
    return tap_disable(nc->peer);
348
}
349

    
350
static void virtio_net_set_queues(VirtIONet *n)
351
{
352
    int i;
353

    
354
    for (i = 0; i < n->max_queues; i++) {
355
        if (i < n->curr_queues) {
356
            assert(!peer_attach(n, i));
357
        } else {
358
            assert(!peer_detach(n, i));
359
        }
360
    }
361
}
362

    
363
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
364

    
365
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
366
{
367
    VirtIONet *n = to_virtio_net(vdev);
368
    NetClientState *nc = qemu_get_queue(n->nic);
369

    
370
    features |= (1 << VIRTIO_NET_F_MAC);
371

    
372
    if (!peer_has_vnet_hdr(n)) {
373
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
374
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
375
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
376
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
377

    
378
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
379
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
380
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
381
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
382
    }
383

    
384
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
385
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
386
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
387
    }
388

    
389
    if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
390
        return features;
391
    }
392
    if (!tap_get_vhost_net(nc->peer)) {
393
        return features;
394
    }
395
    return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
396
}
397

    
398
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
399
{
400
    uint32_t features = 0;
401

    
402
    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
403
     * but also these: */
404
    features |= (1 << VIRTIO_NET_F_MAC);
405
    features |= (1 << VIRTIO_NET_F_CSUM);
406
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
407
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
408
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
409

    
410
    return features;
411
}
412

    
413
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
414
{
415
    VirtIONet *n = to_virtio_net(vdev);
416
    int i;
417

    
418
    virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
419
                              !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
420

    
421
    virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
422

    
423
    if (n->has_vnet_hdr) {
424
        tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
425
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
426
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
427
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
428
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
429
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
430
    }
431

    
432
    for (i = 0;  i < n->max_queues; i++) {
433
        NetClientState *nc = qemu_get_subqueue(n->nic, i);
434

    
435
        if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
436
            continue;
437
        }
438
        if (!tap_get_vhost_net(nc->peer)) {
439
            continue;
440
        }
441
        vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
442
    }
443
}
444

    
445
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
446
                                     struct iovec *iov, unsigned int iov_cnt)
447
{
448
    uint8_t on;
449
    size_t s;
450

    
451
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
452
    if (s != sizeof(on)) {
453
        return VIRTIO_NET_ERR;
454
    }
455

    
456
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
457
        n->promisc = on;
458
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
459
        n->allmulti = on;
460
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
461
        n->alluni = on;
462
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
463
        n->nomulti = on;
464
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
465
        n->nouni = on;
466
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
467
        n->nobcast = on;
468
    } else {
469
        return VIRTIO_NET_ERR;
470
    }
471

    
472
    return VIRTIO_NET_OK;
473
}
474

    
475
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
476
                                 struct iovec *iov, unsigned int iov_cnt)
477
{
478
    struct virtio_net_ctrl_mac mac_data;
479
    size_t s;
480

    
481
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
482
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
483
            return VIRTIO_NET_ERR;
484
        }
485
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
486
        assert(s == sizeof(n->mac));
487
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
488
        return VIRTIO_NET_OK;
489
    }
490

    
491
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
492
        return VIRTIO_NET_ERR;
493
    }
494

    
495
    n->mac_table.in_use = 0;
496
    n->mac_table.first_multi = 0;
497
    n->mac_table.uni_overflow = 0;
498
    n->mac_table.multi_overflow = 0;
499
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
500

    
501
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
502
                   sizeof(mac_data.entries));
503
    mac_data.entries = ldl_p(&mac_data.entries);
504
    if (s != sizeof(mac_data.entries)) {
505
        return VIRTIO_NET_ERR;
506
    }
507
    iov_discard_front(&iov, &iov_cnt, s);
508

    
509
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
510
        return VIRTIO_NET_ERR;
511
    }
512

    
513
    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
514
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
515
                       mac_data.entries * ETH_ALEN);
516
        if (s != mac_data.entries * ETH_ALEN) {
517
            return VIRTIO_NET_ERR;
518
        }
519
        n->mac_table.in_use += mac_data.entries;
520
    } else {
521
        n->mac_table.uni_overflow = 1;
522
    }
523

    
524
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
525

    
526
    n->mac_table.first_multi = n->mac_table.in_use;
527

    
528
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
529
                   sizeof(mac_data.entries));
530
    mac_data.entries = ldl_p(&mac_data.entries);
531
    if (s != sizeof(mac_data.entries)) {
532
        return VIRTIO_NET_ERR;
533
    }
534

    
535
    iov_discard_front(&iov, &iov_cnt, s);
536

    
537
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
538
        return VIRTIO_NET_ERR;
539
    }
540

    
541
    if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
542
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
543
                       mac_data.entries * ETH_ALEN);
544
        if (s != mac_data.entries * ETH_ALEN) {
545
            return VIRTIO_NET_ERR;
546
        }
547
        n->mac_table.in_use += mac_data.entries;
548
    } else {
549
        n->mac_table.multi_overflow = 1;
550
    }
551

    
552
    return VIRTIO_NET_OK;
553
}
554

    
555
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
556
                                        struct iovec *iov, unsigned int iov_cnt)
557
{
558
    uint16_t vid;
559
    size_t s;
560

    
561
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
562
    vid = lduw_p(&vid);
563
    if (s != sizeof(vid)) {
564
        return VIRTIO_NET_ERR;
565
    }
566

    
567
    if (vid >= MAX_VLAN)
568
        return VIRTIO_NET_ERR;
569

    
570
    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
571
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
572
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
573
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
574
    else
575
        return VIRTIO_NET_ERR;
576

    
577
    return VIRTIO_NET_OK;
578
}
579

    
580
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
581
                                VirtQueueElement *elem)
582
{
583
    struct virtio_net_ctrl_mq s;
584

    
585
    if (elem->out_num != 2 ||
586
        elem->out_sg[1].iov_len != sizeof(struct virtio_net_ctrl_mq)) {
587
        error_report("virtio-net ctrl invalid steering command");
588
        return VIRTIO_NET_ERR;
589
    }
590

    
591
    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
592
        return VIRTIO_NET_ERR;
593
    }
594

    
595
    memcpy(&s, elem->out_sg[1].iov_base, sizeof(struct virtio_net_ctrl_mq));
596

    
597
    if (s.virtqueue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
598
        s.virtqueue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
599
        s.virtqueue_pairs > n->max_queues ||
600
        !n->multiqueue) {
601
        return VIRTIO_NET_ERR;
602
    }
603

    
604
    n->curr_queues = s.virtqueue_pairs;
605
    /* stop the backend before changing the number of queues to avoid handling a
606
     * disabled queue */
607
    virtio_net_set_status(&n->vdev, n->vdev.status);
608
    virtio_net_set_queues(n);
609

    
610
    return VIRTIO_NET_OK;
611
}
612
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
613
{
614
    VirtIONet *n = to_virtio_net(vdev);
615
    struct virtio_net_ctrl_hdr ctrl;
616
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
617
    VirtQueueElement elem;
618
    size_t s;
619
    struct iovec *iov;
620
    unsigned int iov_cnt;
621

    
622
    while (virtqueue_pop(vq, &elem)) {
623
        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
624
            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
625
            error_report("virtio-net ctrl missing headers");
626
            exit(1);
627
        }
628

    
629
        iov = elem.out_sg;
630
        iov_cnt = elem.out_num;
631
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
632
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
633
        if (s != sizeof(ctrl)) {
634
            status = VIRTIO_NET_ERR;
635
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
636
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
637
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
638
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
639
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
640
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
641
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
642
            status = virtio_net_handle_mq(n, ctrl.cmd, &elem);
643
        }
644

    
645
        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
646
        assert(s == sizeof(status));
647

    
648
        virtqueue_push(vq, &elem, sizeof(status));
649
        virtio_notify(vdev, vq);
650
    }
651
}
652

    
653
/* RX */
654

    
655
static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
656
{
657
    VirtIONet *n = to_virtio_net(vdev);
658
    int queue_index = vq2q(virtio_get_queue_index(vq));
659

    
660
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
661
}
662

    
663
static int virtio_net_can_receive(NetClientState *nc)
664
{
665
    VirtIONet *n = qemu_get_nic_opaque(nc);
666
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
667

    
668
    if (!n->vdev.vm_running) {
669
        return 0;
670
    }
671

    
672
    if (nc->queue_index >= n->curr_queues) {
673
        return 0;
674
    }
675

    
676
    if (!virtio_queue_ready(q->rx_vq) ||
677
        !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
678
        return 0;
679
    }
680

    
681
    return 1;
682
}
683

    
684
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
685
{
686
    VirtIONet *n = q->n;
687
    if (virtio_queue_empty(q->rx_vq) ||
688
        (n->mergeable_rx_bufs &&
689
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
690
        virtio_queue_set_notification(q->rx_vq, 1);
691

    
692
        /* To avoid a race condition where the guest has made some buffers
693
         * available after the above check but before notification was
694
         * enabled, check for available buffers again.
695
         */
696
        if (virtio_queue_empty(q->rx_vq) ||
697
            (n->mergeable_rx_bufs &&
698
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
699
            return 0;
700
        }
701
    }
702

    
703
    virtio_queue_set_notification(q->rx_vq, 0);
704
    return 1;
705
}
706

    
707
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
708
 * it never finds out that the packets don't have valid checksums.  This
709
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
710
 * fix this with Xen but it hasn't appeared in an upstream release of
711
 * dhclient yet.
712
 *
713
 * To avoid breaking existing guests, we catch udp packets and add
714
 * checksums.  This is terrible but it's better than hacking the guest
715
 * kernels.
716
 *
717
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
718
 * we should provide a mechanism to disable it to avoid polluting the host
719
 * cache.
720
 */
721
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
722
                                        uint8_t *buf, size_t size)
723
{
724
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
725
        (size > 27 && size < 1500) && /* normal sized MTU */
726
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
727
        (buf[23] == 17) && /* ip.protocol == UDP */
728
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
729
        net_checksum_calculate(buf, size);
730
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
731
    }
732
}
733

    
734
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
735
                           const void *buf, size_t size)
736
{
737
    if (n->has_vnet_hdr) {
738
        /* FIXME this cast is evil */
739
        void *wbuf = (void *)buf;
740
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
741
                                    size - n->host_hdr_len);
742
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
743
    } else {
744
        struct virtio_net_hdr hdr = {
745
            .flags = 0,
746
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
747
        };
748
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
749
    }
750
}
751

    
752
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
753
{
754
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
755
    static const uint8_t vlan[] = {0x81, 0x00};
756
    uint8_t *ptr = (uint8_t *)buf;
757
    int i;
758

    
759
    if (n->promisc)
760
        return 1;
761

    
762
    ptr += n->host_hdr_len;
763

    
764
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
765
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
766
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
767
            return 0;
768
    }
769

    
770
    if (ptr[0] & 1) { // multicast
771
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
772
            return !n->nobcast;
773
        } else if (n->nomulti) {
774
            return 0;
775
        } else if (n->allmulti || n->mac_table.multi_overflow) {
776
            return 1;
777
        }
778

    
779
        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
780
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
781
                return 1;
782
            }
783
        }
784
    } else { // unicast
785
        if (n->nouni) {
786
            return 0;
787
        } else if (n->alluni || n->mac_table.uni_overflow) {
788
            return 1;
789
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
790
            return 1;
791
        }
792

    
793
        for (i = 0; i < n->mac_table.first_multi; i++) {
794
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
795
                return 1;
796
            }
797
        }
798
    }
799

    
800
    return 0;
801
}
802

    
803
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
804
{
805
    VirtIONet *n = qemu_get_nic_opaque(nc);
806
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
807
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
808
    struct virtio_net_hdr_mrg_rxbuf mhdr;
809
    unsigned mhdr_cnt = 0;
810
    size_t offset, i, guest_offset;
811

    
812
    if (!virtio_net_can_receive(nc)) {
813
        return -1;
814
    }
815

    
816
    /* hdr_len refers to the header we supply to the guest */
817
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
818
        return 0;
819
    }
820

    
821
    if (!receive_filter(n, buf, size))
822
        return size;
823

    
824
    offset = i = 0;
825

    
826
    while (offset < size) {
827
        VirtQueueElement elem;
828
        int len, total;
829
        const struct iovec *sg = elem.in_sg;
830

    
831
        total = 0;
832

    
833
        if (virtqueue_pop(q->rx_vq, &elem) == 0) {
834
            if (i == 0)
835
                return -1;
836
            error_report("virtio-net unexpected empty queue: "
837
                    "i %zd mergeable %d offset %zd, size %zd, "
838
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x",
839
                    i, n->mergeable_rx_bufs, offset, size,
840
                    n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
841
            exit(1);
842
        }
843

    
844
        if (elem.in_num < 1) {
845
            error_report("virtio-net receive queue contains no in buffers");
846
            exit(1);
847
        }
848

    
849
        if (i == 0) {
850
            assert(offset == 0);
851
            if (n->mergeable_rx_bufs) {
852
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
853
                                    sg, elem.in_num,
854
                                    offsetof(typeof(mhdr), num_buffers),
855
                                    sizeof(mhdr.num_buffers));
856
            }
857

    
858
            receive_header(n, sg, elem.in_num, buf, size);
859
            offset = n->host_hdr_len;
860
            total += n->guest_hdr_len;
861
            guest_offset = n->guest_hdr_len;
862
        } else {
863
            guest_offset = 0;
864
        }
865

    
866
        /* copy in packet.  ugh */
867
        len = iov_from_buf(sg, elem.in_num, guest_offset,
868
                           buf + offset, size - offset);
869
        total += len;
870
        offset += len;
871
        /* If buffers can't be merged, at this point we
872
         * must have consumed the complete packet.
873
         * Otherwise, drop it. */
874
        if (!n->mergeable_rx_bufs && offset < size) {
875
#if 0
876
            error_report("virtio-net truncated non-mergeable packet: "
877
                         "i %zd mergeable %d offset %zd, size %zd, "
878
                         "guest hdr len %zd, host hdr len %zd",
879
                         i, n->mergeable_rx_bufs,
880
                         offset, size, n->guest_hdr_len, n->host_hdr_len);
881
#endif
882
            return size;
883
        }
884

    
885
        /* signal other side */
886
        virtqueue_fill(q->rx_vq, &elem, total, i++);
887
    }
888

    
889
    if (mhdr_cnt) {
890
        stw_p(&mhdr.num_buffers, i);
891
        iov_from_buf(mhdr_sg, mhdr_cnt,
892
                     0,
893
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
894
    }
895

    
896
    virtqueue_flush(q->rx_vq, i);
897
    virtio_notify(&n->vdev, q->rx_vq);
898

    
899
    return size;
900
}
901

    
902
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
903

    
904
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
905
{
906
    VirtIONet *n = qemu_get_nic_opaque(nc);
907
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
908

    
909
    virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
910
    virtio_notify(&n->vdev, q->tx_vq);
911

    
912
    q->async_tx.elem.out_num = q->async_tx.len = 0;
913

    
914
    virtio_queue_set_notification(q->tx_vq, 1);
915
    virtio_net_flush_tx(q);
916
}
917

    
918
/* TX */
919
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
920
{
921
    VirtIONet *n = q->n;
922
    VirtQueueElement elem;
923
    int32_t num_packets = 0;
924
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
925
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
926
        return num_packets;
927
    }
928

    
929
    assert(n->vdev.vm_running);
930

    
931
    if (q->async_tx.elem.out_num) {
932
        virtio_queue_set_notification(q->tx_vq, 0);
933
        return num_packets;
934
    }
935

    
936
    while (virtqueue_pop(q->tx_vq, &elem)) {
937
        ssize_t ret, len;
938
        unsigned int out_num = elem.out_num;
939
        struct iovec *out_sg = &elem.out_sg[0];
940
        struct iovec sg[VIRTQUEUE_MAX_SIZE];
941

    
942
        if (out_num < 1) {
943
            error_report("virtio-net header not in first element");
944
            exit(1);
945
        }
946

    
947
        /*
948
         * If host wants to see the guest header as is, we can
949
         * pass it on unchanged. Otherwise, copy just the parts
950
         * that host is interested in.
951
         */
952
        assert(n->host_hdr_len <= n->guest_hdr_len);
953
        if (n->host_hdr_len != n->guest_hdr_len) {
954
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
955
                                       out_sg, out_num,
956
                                       0, n->host_hdr_len);
957
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
958
                             out_sg, out_num,
959
                             n->guest_hdr_len, -1);
960
            out_num = sg_num;
961
            out_sg = sg;
962
        }
963

    
964
        len = n->guest_hdr_len;
965

    
966
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
967
                                      out_sg, out_num, virtio_net_tx_complete);
968
        if (ret == 0) {
969
            virtio_queue_set_notification(q->tx_vq, 0);
970
            q->async_tx.elem = elem;
971
            q->async_tx.len  = len;
972
            return -EBUSY;
973
        }
974

    
975
        len += ret;
976

    
977
        virtqueue_push(q->tx_vq, &elem, 0);
978
        virtio_notify(&n->vdev, q->tx_vq);
979

    
980
        if (++num_packets >= n->tx_burst) {
981
            break;
982
        }
983
    }
984
    return num_packets;
985
}
986

    
987
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
988
{
989
    VirtIONet *n = to_virtio_net(vdev);
990
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
991

    
992
    /* This happens when device was stopped but VCPU wasn't. */
993
    if (!n->vdev.vm_running) {
994
        q->tx_waiting = 1;
995
        return;
996
    }
997

    
998
    if (q->tx_waiting) {
999
        virtio_queue_set_notification(vq, 1);
1000
        qemu_del_timer(q->tx_timer);
1001
        q->tx_waiting = 0;
1002
        virtio_net_flush_tx(q);
1003
    } else {
1004
        qemu_mod_timer(q->tx_timer,
1005
                       qemu_get_clock_ns(vm_clock) + n->tx_timeout);
1006
        q->tx_waiting = 1;
1007
        virtio_queue_set_notification(vq, 0);
1008
    }
1009
}
1010

    
1011
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
1012
{
1013
    VirtIONet *n = to_virtio_net(vdev);
1014
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1015

    
1016
    if (unlikely(q->tx_waiting)) {
1017
        return;
1018
    }
1019
    q->tx_waiting = 1;
1020
    /* This happens when device was stopped but VCPU wasn't. */
1021
    if (!n->vdev.vm_running) {
1022
        return;
1023
    }
1024
    virtio_queue_set_notification(vq, 0);
1025
    qemu_bh_schedule(q->tx_bh);
1026
}
1027

    
1028
static void virtio_net_tx_timer(void *opaque)
1029
{
1030
    VirtIONetQueue *q = opaque;
1031
    VirtIONet *n = q->n;
1032
    assert(n->vdev.vm_running);
1033

    
1034
    q->tx_waiting = 0;
1035

    
1036
    /* Just in case the driver is not ready on more */
1037
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1038
        return;
1039

    
1040
    virtio_queue_set_notification(q->tx_vq, 1);
1041
    virtio_net_flush_tx(q);
1042
}
1043

    
1044
static void virtio_net_tx_bh(void *opaque)
1045
{
1046
    VirtIONetQueue *q = opaque;
1047
    VirtIONet *n = q->n;
1048
    int32_t ret;
1049

    
1050
    assert(n->vdev.vm_running);
1051

    
1052
    q->tx_waiting = 0;
1053

    
1054
    /* Just in case the driver is not ready on more */
1055
    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
1056
        return;
1057

    
1058
    ret = virtio_net_flush_tx(q);
1059
    if (ret == -EBUSY) {
1060
        return; /* Notification re-enable handled by tx_complete */
1061
    }
1062

    
1063
    /* If we flush a full burst of packets, assume there are
1064
     * more coming and immediately reschedule */
1065
    if (ret >= n->tx_burst) {
1066
        qemu_bh_schedule(q->tx_bh);
1067
        q->tx_waiting = 1;
1068
        return;
1069
    }
1070

    
1071
    /* If less than a full burst, re-enable notification and flush
1072
     * anything that may have come in while we weren't looking.  If
1073
     * we find something, assume the guest is still active and reschedule */
1074
    virtio_queue_set_notification(q->tx_vq, 1);
1075
    if (virtio_net_flush_tx(q) > 0) {
1076
        virtio_queue_set_notification(q->tx_vq, 0);
1077
        qemu_bh_schedule(q->tx_bh);
1078
        q->tx_waiting = 1;
1079
    }
1080
}
1081

    
1082
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
1083
{
1084
    VirtIODevice *vdev = &n->vdev;
1085
    int i, max = multiqueue ? n->max_queues : 1;
1086

    
1087
    n->multiqueue = multiqueue;
1088

    
1089
    for (i = 2; i <= n->max_queues * 2 + 1; i++) {
1090
        virtio_del_queue(vdev, i);
1091
    }
1092

    
1093
    for (i = 1; i < max; i++) {
1094
        n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
1095
        if (n->vqs[i].tx_timer) {
1096
            n->vqs[i].tx_vq =
1097
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
1098
            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
1099
                                                   virtio_net_tx_timer,
1100
                                                   &n->vqs[i]);
1101
        } else {
1102
            n->vqs[i].tx_vq =
1103
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
1104
            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
1105
        }
1106

    
1107
        n->vqs[i].tx_waiting = 0;
1108
        n->vqs[i].n = n;
1109
    }
1110

    
1111
    if (ctrl) {
1112
        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1113
    }
1114

    
1115
    virtio_net_set_queues(n);
1116
}
1117

    
1118
static void virtio_net_save(QEMUFile *f, void *opaque)
1119
{
1120
    int i;
1121
    VirtIONet *n = opaque;
1122

    
1123
    /* At this point, backend must be stopped, otherwise
1124
     * it might keep writing to memory. */
1125
    assert(!n->vhost_started);
1126
    virtio_save(&n->vdev, f);
1127

    
1128
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1129
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1130
    qemu_put_be32(f, n->mergeable_rx_bufs);
1131
    qemu_put_be16(f, n->status);
1132
    qemu_put_byte(f, n->promisc);
1133
    qemu_put_byte(f, n->allmulti);
1134
    qemu_put_be32(f, n->mac_table.in_use);
1135
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1136
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1137
    qemu_put_be32(f, n->has_vnet_hdr);
1138
    qemu_put_byte(f, n->mac_table.multi_overflow);
1139
    qemu_put_byte(f, n->mac_table.uni_overflow);
1140
    qemu_put_byte(f, n->alluni);
1141
    qemu_put_byte(f, n->nomulti);
1142
    qemu_put_byte(f, n->nouni);
1143
    qemu_put_byte(f, n->nobcast);
1144
    qemu_put_byte(f, n->has_ufo);
1145
    if (n->max_queues > 1) {
1146
        qemu_put_be16(f, n->max_queues);
1147
        qemu_put_be16(f, n->curr_queues);
1148
        for (i = 1; i < n->curr_queues; i++) {
1149
            qemu_put_be32(f, n->vqs[i].tx_waiting);
1150
        }
1151
    }
1152
}
1153

    
1154
static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
1155
{
1156
    VirtIONet *n = opaque;
1157
    int ret, i, link_down;
1158

    
1159
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
1160
        return -EINVAL;
1161

    
1162
    ret = virtio_load(&n->vdev, f);
1163
    if (ret) {
1164
        return ret;
1165
    }
1166

    
1167
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1168
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1169

    
1170
    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
1171

    
1172
    if (version_id >= 3)
1173
        n->status = qemu_get_be16(f);
1174

    
1175
    if (version_id >= 4) {
1176
        if (version_id < 8) {
1177
            n->promisc = qemu_get_be32(f);
1178
            n->allmulti = qemu_get_be32(f);
1179
        } else {
1180
            n->promisc = qemu_get_byte(f);
1181
            n->allmulti = qemu_get_byte(f);
1182
        }
1183
    }
1184

    
1185
    if (version_id >= 5) {
1186
        n->mac_table.in_use = qemu_get_be32(f);
1187
        /* MAC_TABLE_ENTRIES may be different from the saved image */
1188
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
1189
            qemu_get_buffer(f, n->mac_table.macs,
1190
                            n->mac_table.in_use * ETH_ALEN);
1191
        } else if (n->mac_table.in_use) {
1192
            uint8_t *buf = g_malloc0(n->mac_table.in_use);
1193
            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
1194
            g_free(buf);
1195
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1196
            n->mac_table.in_use = 0;
1197
        }
1198
    }
1199
 
1200
    if (version_id >= 6)
1201
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1202

    
1203
    if (version_id >= 7) {
1204
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1205
            error_report("virtio-net: saved image requires vnet_hdr=on");
1206
            return -1;
1207
        }
1208

    
1209
        if (n->has_vnet_hdr) {
1210
            tap_set_offload(qemu_get_queue(n->nic)->peer,
1211
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
1212
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
1213
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
1214
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
1215
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
1216
        }
1217
    }
1218

    
1219
    if (version_id >= 9) {
1220
        n->mac_table.multi_overflow = qemu_get_byte(f);
1221
        n->mac_table.uni_overflow = qemu_get_byte(f);
1222
    }
1223

    
1224
    if (version_id >= 10) {
1225
        n->alluni = qemu_get_byte(f);
1226
        n->nomulti = qemu_get_byte(f);
1227
        n->nouni = qemu_get_byte(f);
1228
        n->nobcast = qemu_get_byte(f);
1229
    }
1230

    
1231
    if (version_id >= 11) {
1232
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1233
            error_report("virtio-net: saved image requires TUN_F_UFO support");
1234
            return -1;
1235
        }
1236
    }
1237

    
1238
    if (n->max_queues > 1) {
1239
        if (n->max_queues != qemu_get_be16(f)) {
1240
            error_report("virtio-net: different max_queues ");
1241
            return -1;
1242
        }
1243

    
1244
        n->curr_queues = qemu_get_be16(f);
1245
        for (i = 1; i < n->curr_queues; i++) {
1246
            n->vqs[i].tx_waiting = qemu_get_be32(f);
1247
        }
1248
    }
1249

    
1250
    virtio_net_set_queues(n);
1251

    
1252
    /* Find the first multicast entry in the saved MAC filter */
1253
    for (i = 0; i < n->mac_table.in_use; i++) {
1254
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1255
            break;
1256
        }
1257
    }
1258
    n->mac_table.first_multi = i;
1259

    
1260
    /* nc.link_down can't be migrated, so infer link_down according
1261
     * to link status bit in n->status */
1262
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1263
    for (i = 0; i < n->max_queues; i++) {
1264
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
1265
    }
1266

    
1267
    return 0;
1268
}
1269

    
1270
static void virtio_net_cleanup(NetClientState *nc)
1271
{
1272
    VirtIONet *n = qemu_get_nic_opaque(nc);
1273

    
1274
    n->nic = NULL;
1275
}
1276

    
1277
static NetClientInfo net_virtio_info = {
1278
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
1279
    .size = sizeof(NICState),
1280
    .can_receive = virtio_net_can_receive,
1281
    .receive = virtio_net_receive,
1282
        .cleanup = virtio_net_cleanup,
1283
    .link_status_changed = virtio_net_set_link_status,
1284
};
1285

    
1286
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1287
{
1288
    VirtIONet *n = to_virtio_net(vdev);
1289
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1290
    assert(n->vhost_started);
1291
    return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
1292
}
1293

    
1294
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1295
                                           bool mask)
1296
{
1297
    VirtIONet *n = to_virtio_net(vdev);
1298
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1299
    assert(n->vhost_started);
1300
    vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
1301
                             vdev, idx, mask);
1302
}
1303

    
1304
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
1305
                              virtio_net_conf *net, uint32_t host_features)
1306
{
1307
    VirtIONet *n;
1308
    int i, config_size = 0;
1309

    
1310
    for (i = 0; feature_sizes[i].flags != 0; i++) {
1311
        if (host_features & feature_sizes[i].flags) {
1312
            config_size = MAX(feature_sizes[i].end, config_size);
1313
        }
1314
    }
1315

    
1316
    n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
1317
                                        config_size, sizeof(VirtIONet));
1318

    
1319
    n->config_size = config_size;
1320
    n->vdev.get_config = virtio_net_get_config;
1321
    n->vdev.set_config = virtio_net_set_config;
1322
    n->vdev.get_features = virtio_net_get_features;
1323
    n->vdev.set_features = virtio_net_set_features;
1324
    n->vdev.bad_features = virtio_net_bad_features;
1325
    n->vdev.reset = virtio_net_reset;
1326
    n->vdev.set_status = virtio_net_set_status;
1327
    n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
1328
    n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
1329
    n->max_queues = MAX(conf->queues, 1);
1330
    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1331
    n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
1332
    n->curr_queues = 1;
1333
    n->vqs[0].n = n;
1334
    n->tx_timeout = net->txtimer;
1335

    
1336
    if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
1337
        error_report("virtio-net: "
1338
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1339
                     net->tx);
1340
        error_report("Defaulting to \"bh\"");
1341
    }
1342

    
1343
    if (net->tx && !strcmp(net->tx, "timer")) {
1344
        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
1345
                                           virtio_net_handle_tx_timer);
1346
        n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
1347
                                               &n->vqs[0]);
1348
    } else {
1349
        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
1350
                                           virtio_net_handle_tx_bh);
1351
        n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
1352
    }
1353
    n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
1354
    qemu_macaddr_default_if_unset(&conf->macaddr);
1355
    memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
1356
    n->status = VIRTIO_NET_S_LINK_UP;
1357

    
1358
    n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
1359
    peer_test_vnet_hdr(n);
1360
    if (peer_has_vnet_hdr(n)) {
1361
        for (i = 0; i < n->max_queues; i++) {
1362
            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
1363
        }
1364
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
1365
    } else {
1366
        n->host_hdr_len = 0;
1367
    }
1368

    
1369
    qemu_format_nic_info_str(qemu_get_queue(n->nic), conf->macaddr.a);
1370

    
1371
    n->vqs[0].tx_waiting = 0;
1372
    n->tx_burst = net->txburst;
1373
    virtio_net_set_mrg_rx_bufs(n, 0);
1374
    n->promisc = 1; /* for compatibility */
1375

    
1376
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1377

    
1378
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1379

    
1380
    n->qdev = dev;
1381
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
1382
                    virtio_net_save, virtio_net_load, n);
1383

    
1384
    add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
1385

    
1386
    return &n->vdev;
1387
}
1388

    
1389
void virtio_net_exit(VirtIODevice *vdev)
1390
{
1391
    VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
1392
    int i;
1393

    
1394
    /* This will stop vhost backend if appropriate. */
1395
    virtio_net_set_status(vdev, 0);
1396

    
1397
    unregister_savevm(n->qdev, "virtio-net", n);
1398

    
1399
    g_free(n->mac_table.macs);
1400
    g_free(n->vlans);
1401

    
1402
    for (i = 0; i < n->max_queues; i++) {
1403
        VirtIONetQueue *q = &n->vqs[i];
1404
        NetClientState *nc = qemu_get_subqueue(n->nic, i);
1405

    
1406
        qemu_purge_queued_packets(nc);
1407

    
1408
        if (q->tx_timer) {
1409
            qemu_del_timer(q->tx_timer);
1410
            qemu_free_timer(q->tx_timer);
1411
        } else {
1412
            qemu_bh_delete(q->tx_bh);
1413
        }
1414
    }
1415

    
1416
    g_free(n->vqs);
1417
    qemu_del_nic(n->nic);
1418
    virtio_cleanup(&n->vdev);
1419
}