Statistics
| Branch: | Revision:

root / hw / virtio-net.c @ 0d09e41a

History | View | Annotate | Download (39.9 kB)

1
/*
2
 * Virtio Network Device
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include "qemu/iov.h"
15
#include "hw/virtio/virtio.h"
16
#include "net/net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19
#include "qemu/error-report.h"
20
#include "qemu/timer.h"
21
#include "hw/virtio/virtio-net.h"
22
#include "net/vhost_net.h"
23

    
24
#define VIRTIO_NET_VM_VERSION    11
25

    
26
#define MAC_TABLE_ENTRIES    64
27
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
28

    
29
/*
30
 * Calculate the number of bytes up to and including the given 'field' of
31
 * 'container'.
32
 */
33
#define endof(container, field) \
34
    (offsetof(container, field) + sizeof(((container *)0)->field))
35

    
36
typedef struct VirtIOFeature {
37
    uint32_t flags;
38
    size_t end;
39
} VirtIOFeature;
40

    
41
static VirtIOFeature feature_sizes[] = {
42
    {.flags = 1 << VIRTIO_NET_F_MAC,
43
     .end = endof(struct virtio_net_config, mac)},
44
    {.flags = 1 << VIRTIO_NET_F_STATUS,
45
     .end = endof(struct virtio_net_config, status)},
46
    {.flags = 1 << VIRTIO_NET_F_MQ,
47
     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
48
    {}
49
};
50

    
51
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
52
{
53
    VirtIONet *n = qemu_get_nic_opaque(nc);
54

    
55
    return &n->vqs[nc->queue_index];
56
}
57

    
58
static int vq2q(int queue_index)
59
{
60
    return queue_index / 2;
61
}
62

    
63
/* TODO
64
 * - we could suppress RX interrupt if we were so inclined.
65
 */
66

    
67
static VirtIONet *to_virtio_net(VirtIODevice *vdev)
68
{
69
    return (VirtIONet *)vdev;
70
}
71

    
72
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
73
{
74
    VirtIONet *n = to_virtio_net(vdev);
75
    struct virtio_net_config netcfg;
76

    
77
    stw_p(&netcfg.status, n->status);
78
    stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
79
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
80
    memcpy(config, &netcfg, n->config_size);
81
}
82

    
83
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
84
{
85
    VirtIONet *n = to_virtio_net(vdev);
86
    struct virtio_net_config netcfg = {};
87

    
88
    memcpy(&netcfg, config, n->config_size);
89

    
90
    if (!(n->vdev.guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
91
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
92
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
93
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
94
    }
95
}
96

    
97
static bool virtio_net_started(VirtIONet *n, uint8_t status)
98
{
99
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
100
        (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
101
}
102

    
103
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
104
{
105
    NetClientState *nc = qemu_get_queue(n->nic);
106
    int queues = n->multiqueue ? n->max_queues : 1;
107

    
108
    if (!nc->peer) {
109
        return;
110
    }
111
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
112
        return;
113
    }
114

    
115
    if (!tap_get_vhost_net(nc->peer)) {
116
        return;
117
    }
118

    
119
    if (!!n->vhost_started == virtio_net_started(n, status) &&
120
                              !nc->peer->link_down) {
121
        return;
122
    }
123
    if (!n->vhost_started) {
124
        int r;
125
        if (!vhost_net_query(tap_get_vhost_net(nc->peer), &n->vdev)) {
126
            return;
127
        }
128
        n->vhost_started = 1;
129
        r = vhost_net_start(&n->vdev, n->nic->ncs, queues);
130
        if (r < 0) {
131
            error_report("unable to start vhost net: %d: "
132
                         "falling back on userspace virtio", -r);
133
            n->vhost_started = 0;
134
        }
135
    } else {
136
        vhost_net_stop(&n->vdev, n->nic->ncs, queues);
137
        n->vhost_started = 0;
138
    }
139
}
140

    
141
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
142
{
143
    VirtIONet *n = to_virtio_net(vdev);
144
    VirtIONetQueue *q;
145
    int i;
146
    uint8_t queue_status;
147

    
148
    virtio_net_vhost_status(n, status);
149

    
150
    for (i = 0; i < n->max_queues; i++) {
151
        q = &n->vqs[i];
152

    
153
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
154
            queue_status = 0;
155
        } else {
156
            queue_status = status;
157
        }
158

    
159
        if (!q->tx_waiting) {
160
            continue;
161
        }
162

    
163
        if (virtio_net_started(n, queue_status) && !n->vhost_started) {
164
            if (q->tx_timer) {
165
                qemu_mod_timer(q->tx_timer,
166
                               qemu_get_clock_ns(vm_clock) + n->tx_timeout);
167
            } else {
168
                qemu_bh_schedule(q->tx_bh);
169
            }
170
        } else {
171
            if (q->tx_timer) {
172
                qemu_del_timer(q->tx_timer);
173
            } else {
174
                qemu_bh_cancel(q->tx_bh);
175
            }
176
        }
177
    }
178
}
179

    
180
static void virtio_net_set_link_status(NetClientState *nc)
181
{
182
    VirtIONet *n = qemu_get_nic_opaque(nc);
183
    uint16_t old_status = n->status;
184

    
185
    if (nc->link_down)
186
        n->status &= ~VIRTIO_NET_S_LINK_UP;
187
    else
188
        n->status |= VIRTIO_NET_S_LINK_UP;
189

    
190
    if (n->status != old_status)
191
        virtio_notify_config(&n->vdev);
192

    
193
    virtio_net_set_status(&n->vdev, n->vdev.status);
194
}
195

    
196
static void virtio_net_reset(VirtIODevice *vdev)
197
{
198
    VirtIONet *n = to_virtio_net(vdev);
199

    
200
    /* Reset back to compatibility mode */
201
    n->promisc = 1;
202
    n->allmulti = 0;
203
    n->alluni = 0;
204
    n->nomulti = 0;
205
    n->nouni = 0;
206
    n->nobcast = 0;
207
    /* multiqueue is disabled by default */
208
    n->curr_queues = 1;
209

    
210
    /* Flush any MAC and VLAN filter table state */
211
    n->mac_table.in_use = 0;
212
    n->mac_table.first_multi = 0;
213
    n->mac_table.multi_overflow = 0;
214
    n->mac_table.uni_overflow = 0;
215
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
216
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
217
    memset(n->vlans, 0, MAX_VLAN >> 3);
218
}
219

    
220
static void peer_test_vnet_hdr(VirtIONet *n)
221
{
222
    NetClientState *nc = qemu_get_queue(n->nic);
223
    if (!nc->peer) {
224
        return;
225
    }
226

    
227
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
228
        return;
229
    }
230

    
231
    n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
232
}
233

    
234
static int peer_has_vnet_hdr(VirtIONet *n)
235
{
236
    return n->has_vnet_hdr;
237
}
238

    
239
static int peer_has_ufo(VirtIONet *n)
240
{
241
    if (!peer_has_vnet_hdr(n))
242
        return 0;
243

    
244
    n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
245

    
246
    return n->has_ufo;
247
}
248

    
249
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
250
{
251
    int i;
252
    NetClientState *nc;
253

    
254
    n->mergeable_rx_bufs = mergeable_rx_bufs;
255

    
256
    n->guest_hdr_len = n->mergeable_rx_bufs ?
257
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
258

    
259
    for (i = 0; i < n->max_queues; i++) {
260
        nc = qemu_get_subqueue(n->nic, i);
261

    
262
        if (peer_has_vnet_hdr(n) &&
263
            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
264
            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
265
            n->host_hdr_len = n->guest_hdr_len;
266
        }
267
    }
268
}
269

    
270
static int peer_attach(VirtIONet *n, int index)
271
{
272
    NetClientState *nc = qemu_get_subqueue(n->nic, index);
273

    
274
    if (!nc->peer) {
275
        return 0;
276
    }
277

    
278
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
279
        return 0;
280
    }
281

    
282
    return tap_enable(nc->peer);
283
}
284

    
285
static int peer_detach(VirtIONet *n, int index)
286
{
287
    NetClientState *nc = qemu_get_subqueue(n->nic, index);
288

    
289
    if (!nc->peer) {
290
        return 0;
291
    }
292

    
293
    if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
294
        return 0;
295
    }
296

    
297
    return tap_disable(nc->peer);
298
}
299

    
300
static void virtio_net_set_queues(VirtIONet *n)
301
{
302
    int i;
303

    
304
    for (i = 0; i < n->max_queues; i++) {
305
        if (i < n->curr_queues) {
306
            assert(!peer_attach(n, i));
307
        } else {
308
            assert(!peer_detach(n, i));
309
        }
310
    }
311
}
312

    
313
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
314

    
315
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
316
{
317
    VirtIONet *n = to_virtio_net(vdev);
318
    NetClientState *nc = qemu_get_queue(n->nic);
319

    
320
    features |= (1 << VIRTIO_NET_F_MAC);
321

    
322
    if (!peer_has_vnet_hdr(n)) {
323
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
324
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
325
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
326
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
327

    
328
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
329
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
330
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
331
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
332
    }
333

    
334
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
335
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
336
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
337
    }
338

    
339
    if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
340
        return features;
341
    }
342
    if (!tap_get_vhost_net(nc->peer)) {
343
        return features;
344
    }
345
    return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
346
}
347

    
348
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
349
{
350
    uint32_t features = 0;
351

    
352
    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
353
     * but also these: */
354
    features |= (1 << VIRTIO_NET_F_MAC);
355
    features |= (1 << VIRTIO_NET_F_CSUM);
356
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
357
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
358
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
359

    
360
    return features;
361
}
362

    
363
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
364
{
365
    VirtIONet *n = to_virtio_net(vdev);
366
    int i;
367

    
368
    virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
369
                              !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
370

    
371
    virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
372

    
373
    if (n->has_vnet_hdr) {
374
        tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
375
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
376
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
377
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
378
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
379
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
380
    }
381

    
382
    for (i = 0;  i < n->max_queues; i++) {
383
        NetClientState *nc = qemu_get_subqueue(n->nic, i);
384

    
385
        if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
386
            continue;
387
        }
388
        if (!tap_get_vhost_net(nc->peer)) {
389
            continue;
390
        }
391
        vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
392
    }
393
}
394

    
395
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
396
                                     struct iovec *iov, unsigned int iov_cnt)
397
{
398
    uint8_t on;
399
    size_t s;
400

    
401
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
402
    if (s != sizeof(on)) {
403
        return VIRTIO_NET_ERR;
404
    }
405

    
406
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
407
        n->promisc = on;
408
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
409
        n->allmulti = on;
410
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
411
        n->alluni = on;
412
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
413
        n->nomulti = on;
414
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
415
        n->nouni = on;
416
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
417
        n->nobcast = on;
418
    } else {
419
        return VIRTIO_NET_ERR;
420
    }
421

    
422
    return VIRTIO_NET_OK;
423
}
424

    
425
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
426
                                 struct iovec *iov, unsigned int iov_cnt)
427
{
428
    struct virtio_net_ctrl_mac mac_data;
429
    size_t s;
430

    
431
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
432
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
433
            return VIRTIO_NET_ERR;
434
        }
435
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
436
        assert(s == sizeof(n->mac));
437
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
438
        return VIRTIO_NET_OK;
439
    }
440

    
441
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
442
        return VIRTIO_NET_ERR;
443
    }
444

    
445
    n->mac_table.in_use = 0;
446
    n->mac_table.first_multi = 0;
447
    n->mac_table.uni_overflow = 0;
448
    n->mac_table.multi_overflow = 0;
449
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
450

    
451
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
452
                   sizeof(mac_data.entries));
453
    mac_data.entries = ldl_p(&mac_data.entries);
454
    if (s != sizeof(mac_data.entries)) {
455
        return VIRTIO_NET_ERR;
456
    }
457
    iov_discard_front(&iov, &iov_cnt, s);
458

    
459
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
460
        return VIRTIO_NET_ERR;
461
    }
462

    
463
    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
464
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
465
                       mac_data.entries * ETH_ALEN);
466
        if (s != mac_data.entries * ETH_ALEN) {
467
            return VIRTIO_NET_ERR;
468
        }
469
        n->mac_table.in_use += mac_data.entries;
470
    } else {
471
        n->mac_table.uni_overflow = 1;
472
    }
473

    
474
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
475

    
476
    n->mac_table.first_multi = n->mac_table.in_use;
477

    
478
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
479
                   sizeof(mac_data.entries));
480
    mac_data.entries = ldl_p(&mac_data.entries);
481
    if (s != sizeof(mac_data.entries)) {
482
        return VIRTIO_NET_ERR;
483
    }
484

    
485
    iov_discard_front(&iov, &iov_cnt, s);
486

    
487
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
488
        return VIRTIO_NET_ERR;
489
    }
490

    
491
    if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
492
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
493
                       mac_data.entries * ETH_ALEN);
494
        if (s != mac_data.entries * ETH_ALEN) {
495
            return VIRTIO_NET_ERR;
496
        }
497
        n->mac_table.in_use += mac_data.entries;
498
    } else {
499
        n->mac_table.multi_overflow = 1;
500
    }
501

    
502
    return VIRTIO_NET_OK;
503
}
504

    
505
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
506
                                        struct iovec *iov, unsigned int iov_cnt)
507
{
508
    uint16_t vid;
509
    size_t s;
510

    
511
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
512
    vid = lduw_p(&vid);
513
    if (s != sizeof(vid)) {
514
        return VIRTIO_NET_ERR;
515
    }
516

    
517
    if (vid >= MAX_VLAN)
518
        return VIRTIO_NET_ERR;
519

    
520
    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
521
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
522
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
523
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
524
    else
525
        return VIRTIO_NET_ERR;
526

    
527
    return VIRTIO_NET_OK;
528
}
529

    
530
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
531
                                struct iovec *iov, unsigned int iov_cnt)
532
{
533
    struct virtio_net_ctrl_mq mq;
534
    size_t s;
535
    uint16_t queues;
536

    
537
    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
538
    if (s != sizeof(mq)) {
539
        return VIRTIO_NET_ERR;
540
    }
541

    
542
    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
543
        return VIRTIO_NET_ERR;
544
    }
545

    
546
    queues = lduw_p(&mq.virtqueue_pairs);
547

    
548
    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
549
        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
550
        queues > n->max_queues ||
551
        !n->multiqueue) {
552
        return VIRTIO_NET_ERR;
553
    }
554

    
555
    n->curr_queues = queues;
556
    /* stop the backend before changing the number of queues to avoid handling a
557
     * disabled queue */
558
    virtio_net_set_status(&n->vdev, n->vdev.status);
559
    virtio_net_set_queues(n);
560

    
561
    return VIRTIO_NET_OK;
562
}
563
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
564
{
565
    VirtIONet *n = to_virtio_net(vdev);
566
    struct virtio_net_ctrl_hdr ctrl;
567
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
568
    VirtQueueElement elem;
569
    size_t s;
570
    struct iovec *iov;
571
    unsigned int iov_cnt;
572

    
573
    while (virtqueue_pop(vq, &elem)) {
574
        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
575
            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
576
            error_report("virtio-net ctrl missing headers");
577
            exit(1);
578
        }
579

    
580
        iov = elem.out_sg;
581
        iov_cnt = elem.out_num;
582
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
583
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
584
        if (s != sizeof(ctrl)) {
585
            status = VIRTIO_NET_ERR;
586
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
587
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
588
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
589
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
590
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
591
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
592
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
593
            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
594
        }
595

    
596
        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
597
        assert(s == sizeof(status));
598

    
599
        virtqueue_push(vq, &elem, sizeof(status));
600
        virtio_notify(vdev, vq);
601
    }
602
}
603

    
604
/* RX */
605

    
606
static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
607
{
608
    VirtIONet *n = to_virtio_net(vdev);
609
    int queue_index = vq2q(virtio_get_queue_index(vq));
610

    
611
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
612
}
613

    
614
static int virtio_net_can_receive(NetClientState *nc)
615
{
616
    VirtIONet *n = qemu_get_nic_opaque(nc);
617
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
618

    
619
    if (!n->vdev.vm_running) {
620
        return 0;
621
    }
622

    
623
    if (nc->queue_index >= n->curr_queues) {
624
        return 0;
625
    }
626

    
627
    if (!virtio_queue_ready(q->rx_vq) ||
628
        !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
629
        return 0;
630
    }
631

    
632
    return 1;
633
}
634

    
635
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
636
{
637
    VirtIONet *n = q->n;
638
    if (virtio_queue_empty(q->rx_vq) ||
639
        (n->mergeable_rx_bufs &&
640
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
641
        virtio_queue_set_notification(q->rx_vq, 1);
642

    
643
        /* To avoid a race condition where the guest has made some buffers
644
         * available after the above check but before notification was
645
         * enabled, check for available buffers again.
646
         */
647
        if (virtio_queue_empty(q->rx_vq) ||
648
            (n->mergeable_rx_bufs &&
649
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
650
            return 0;
651
        }
652
    }
653

    
654
    virtio_queue_set_notification(q->rx_vq, 0);
655
    return 1;
656
}
657

    
658
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
659
 * it never finds out that the packets don't have valid checksums.  This
660
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
661
 * fix this with Xen but it hasn't appeared in an upstream release of
662
 * dhclient yet.
663
 *
664
 * To avoid breaking existing guests, we catch udp packets and add
665
 * checksums.  This is terrible but it's better than hacking the guest
666
 * kernels.
667
 *
668
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
669
 * we should provide a mechanism to disable it to avoid polluting the host
670
 * cache.
671
 */
672
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
673
                                        uint8_t *buf, size_t size)
674
{
675
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
676
        (size > 27 && size < 1500) && /* normal sized MTU */
677
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
678
        (buf[23] == 17) && /* ip.protocol == UDP */
679
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
680
        net_checksum_calculate(buf, size);
681
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
682
    }
683
}
684

    
685
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
686
                           const void *buf, size_t size)
687
{
688
    if (n->has_vnet_hdr) {
689
        /* FIXME this cast is evil */
690
        void *wbuf = (void *)buf;
691
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
692
                                    size - n->host_hdr_len);
693
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
694
    } else {
695
        struct virtio_net_hdr hdr = {
696
            .flags = 0,
697
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
698
        };
699
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
700
    }
701
}
702

    
703
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
704
{
705
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
706
    static const uint8_t vlan[] = {0x81, 0x00};
707
    uint8_t *ptr = (uint8_t *)buf;
708
    int i;
709

    
710
    if (n->promisc)
711
        return 1;
712

    
713
    ptr += n->host_hdr_len;
714

    
715
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
716
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
717
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
718
            return 0;
719
    }
720

    
721
    if (ptr[0] & 1) { // multicast
722
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
723
            return !n->nobcast;
724
        } else if (n->nomulti) {
725
            return 0;
726
        } else if (n->allmulti || n->mac_table.multi_overflow) {
727
            return 1;
728
        }
729

    
730
        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
731
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
732
                return 1;
733
            }
734
        }
735
    } else { // unicast
736
        if (n->nouni) {
737
            return 0;
738
        } else if (n->alluni || n->mac_table.uni_overflow) {
739
            return 1;
740
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
741
            return 1;
742
        }
743

    
744
        for (i = 0; i < n->mac_table.first_multi; i++) {
745
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
746
                return 1;
747
            }
748
        }
749
    }
750

    
751
    return 0;
752
}
753

    
754
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
755
{
756
    VirtIONet *n = qemu_get_nic_opaque(nc);
757
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
758
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
759
    struct virtio_net_hdr_mrg_rxbuf mhdr;
760
    unsigned mhdr_cnt = 0;
761
    size_t offset, i, guest_offset;
762

    
763
    if (!virtio_net_can_receive(nc)) {
764
        return -1;
765
    }
766

    
767
    /* hdr_len refers to the header we supply to the guest */
768
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
769
        return 0;
770
    }
771

    
772
    if (!receive_filter(n, buf, size))
773
        return size;
774

    
775
    offset = i = 0;
776

    
777
    while (offset < size) {
778
        VirtQueueElement elem;
779
        int len, total;
780
        const struct iovec *sg = elem.in_sg;
781

    
782
        total = 0;
783

    
784
        if (virtqueue_pop(q->rx_vq, &elem) == 0) {
785
            if (i == 0)
786
                return -1;
787
            error_report("virtio-net unexpected empty queue: "
788
                    "i %zd mergeable %d offset %zd, size %zd, "
789
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x",
790
                    i, n->mergeable_rx_bufs, offset, size,
791
                    n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
792
            exit(1);
793
        }
794

    
795
        if (elem.in_num < 1) {
796
            error_report("virtio-net receive queue contains no in buffers");
797
            exit(1);
798
        }
799

    
800
        if (i == 0) {
801
            assert(offset == 0);
802
            if (n->mergeable_rx_bufs) {
803
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
804
                                    sg, elem.in_num,
805
                                    offsetof(typeof(mhdr), num_buffers),
806
                                    sizeof(mhdr.num_buffers));
807
            }
808

    
809
            receive_header(n, sg, elem.in_num, buf, size);
810
            offset = n->host_hdr_len;
811
            total += n->guest_hdr_len;
812
            guest_offset = n->guest_hdr_len;
813
        } else {
814
            guest_offset = 0;
815
        }
816

    
817
        /* copy in packet.  ugh */
818
        len = iov_from_buf(sg, elem.in_num, guest_offset,
819
                           buf + offset, size - offset);
820
        total += len;
821
        offset += len;
822
        /* If buffers can't be merged, at this point we
823
         * must have consumed the complete packet.
824
         * Otherwise, drop it. */
825
        if (!n->mergeable_rx_bufs && offset < size) {
826
#if 0
827
            error_report("virtio-net truncated non-mergeable packet: "
828
                         "i %zd mergeable %d offset %zd, size %zd, "
829
                         "guest hdr len %zd, host hdr len %zd",
830
                         i, n->mergeable_rx_bufs,
831
                         offset, size, n->guest_hdr_len, n->host_hdr_len);
832
#endif
833
            return size;
834
        }
835

    
836
        /* signal other side */
837
        virtqueue_fill(q->rx_vq, &elem, total, i++);
838
    }
839

    
840
    if (mhdr_cnt) {
841
        stw_p(&mhdr.num_buffers, i);
842
        iov_from_buf(mhdr_sg, mhdr_cnt,
843
                     0,
844
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
845
    }
846

    
847
    virtqueue_flush(q->rx_vq, i);
848
    virtio_notify(&n->vdev, q->rx_vq);
849

    
850
    return size;
851
}
852

    
853
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
854

    
855
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
856
{
857
    VirtIONet *n = qemu_get_nic_opaque(nc);
858
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
859

    
860
    virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
861
    virtio_notify(&n->vdev, q->tx_vq);
862

    
863
    q->async_tx.elem.out_num = q->async_tx.len = 0;
864

    
865
    virtio_queue_set_notification(q->tx_vq, 1);
866
    virtio_net_flush_tx(q);
867
}
868

    
869
/* TX */
870
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
871
{
872
    VirtIONet *n = q->n;
873
    VirtQueueElement elem;
874
    int32_t num_packets = 0;
875
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
876
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
877
        return num_packets;
878
    }
879

    
880
    assert(n->vdev.vm_running);
881

    
882
    if (q->async_tx.elem.out_num) {
883
        virtio_queue_set_notification(q->tx_vq, 0);
884
        return num_packets;
885
    }
886

    
887
    while (virtqueue_pop(q->tx_vq, &elem)) {
888
        ssize_t ret, len;
889
        unsigned int out_num = elem.out_num;
890
        struct iovec *out_sg = &elem.out_sg[0];
891
        struct iovec sg[VIRTQUEUE_MAX_SIZE];
892

    
893
        if (out_num < 1) {
894
            error_report("virtio-net header not in first element");
895
            exit(1);
896
        }
897

    
898
        /*
899
         * If host wants to see the guest header as is, we can
900
         * pass it on unchanged. Otherwise, copy just the parts
901
         * that host is interested in.
902
         */
903
        assert(n->host_hdr_len <= n->guest_hdr_len);
904
        if (n->host_hdr_len != n->guest_hdr_len) {
905
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
906
                                       out_sg, out_num,
907
                                       0, n->host_hdr_len);
908
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
909
                             out_sg, out_num,
910
                             n->guest_hdr_len, -1);
911
            out_num = sg_num;
912
            out_sg = sg;
913
        }
914

    
915
        len = n->guest_hdr_len;
916

    
917
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
918
                                      out_sg, out_num, virtio_net_tx_complete);
919
        if (ret == 0) {
920
            virtio_queue_set_notification(q->tx_vq, 0);
921
            q->async_tx.elem = elem;
922
            q->async_tx.len  = len;
923
            return -EBUSY;
924
        }
925

    
926
        len += ret;
927

    
928
        virtqueue_push(q->tx_vq, &elem, 0);
929
        virtio_notify(&n->vdev, q->tx_vq);
930

    
931
        if (++num_packets >= n->tx_burst) {
932
            break;
933
        }
934
    }
935
    return num_packets;
936
}
937

    
938
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
939
{
940
    VirtIONet *n = to_virtio_net(vdev);
941
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
942

    
943
    /* This happens when device was stopped but VCPU wasn't. */
944
    if (!n->vdev.vm_running) {
945
        q->tx_waiting = 1;
946
        return;
947
    }
948

    
949
    if (q->tx_waiting) {
950
        virtio_queue_set_notification(vq, 1);
951
        qemu_del_timer(q->tx_timer);
952
        q->tx_waiting = 0;
953
        virtio_net_flush_tx(q);
954
    } else {
955
        qemu_mod_timer(q->tx_timer,
956
                       qemu_get_clock_ns(vm_clock) + n->tx_timeout);
957
        q->tx_waiting = 1;
958
        virtio_queue_set_notification(vq, 0);
959
    }
960
}
961

    
962
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
963
{
964
    VirtIONet *n = to_virtio_net(vdev);
965
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
966

    
967
    if (unlikely(q->tx_waiting)) {
968
        return;
969
    }
970
    q->tx_waiting = 1;
971
    /* This happens when device was stopped but VCPU wasn't. */
972
    if (!n->vdev.vm_running) {
973
        return;
974
    }
975
    virtio_queue_set_notification(vq, 0);
976
    qemu_bh_schedule(q->tx_bh);
977
}
978

    
979
static void virtio_net_tx_timer(void *opaque)
980
{
981
    VirtIONetQueue *q = opaque;
982
    VirtIONet *n = q->n;
983
    assert(n->vdev.vm_running);
984

    
985
    q->tx_waiting = 0;
986

    
987
    /* Just in case the driver is not ready on more */
988
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
989
        return;
990

    
991
    virtio_queue_set_notification(q->tx_vq, 1);
992
    virtio_net_flush_tx(q);
993
}
994

    
995
static void virtio_net_tx_bh(void *opaque)
996
{
997
    VirtIONetQueue *q = opaque;
998
    VirtIONet *n = q->n;
999
    int32_t ret;
1000

    
1001
    assert(n->vdev.vm_running);
1002

    
1003
    q->tx_waiting = 0;
1004

    
1005
    /* Just in case the driver is not ready on more */
1006
    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
1007
        return;
1008

    
1009
    ret = virtio_net_flush_tx(q);
1010
    if (ret == -EBUSY) {
1011
        return; /* Notification re-enable handled by tx_complete */
1012
    }
1013

    
1014
    /* If we flush a full burst of packets, assume there are
1015
     * more coming and immediately reschedule */
1016
    if (ret >= n->tx_burst) {
1017
        qemu_bh_schedule(q->tx_bh);
1018
        q->tx_waiting = 1;
1019
        return;
1020
    }
1021

    
1022
    /* If less than a full burst, re-enable notification and flush
1023
     * anything that may have come in while we weren't looking.  If
1024
     * we find something, assume the guest is still active and reschedule */
1025
    virtio_queue_set_notification(q->tx_vq, 1);
1026
    if (virtio_net_flush_tx(q) > 0) {
1027
        virtio_queue_set_notification(q->tx_vq, 0);
1028
        qemu_bh_schedule(q->tx_bh);
1029
        q->tx_waiting = 1;
1030
    }
1031
}
1032

    
1033
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
1034
{
1035
    VirtIODevice *vdev = &n->vdev;
1036
    int i, max = multiqueue ? n->max_queues : 1;
1037

    
1038
    n->multiqueue = multiqueue;
1039

    
1040
    for (i = 2; i <= n->max_queues * 2 + 1; i++) {
1041
        virtio_del_queue(vdev, i);
1042
    }
1043

    
1044
    for (i = 1; i < max; i++) {
1045
        n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
1046
        if (n->vqs[i].tx_timer) {
1047
            n->vqs[i].tx_vq =
1048
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
1049
            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
1050
                                                   virtio_net_tx_timer,
1051
                                                   &n->vqs[i]);
1052
        } else {
1053
            n->vqs[i].tx_vq =
1054
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
1055
            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
1056
        }
1057

    
1058
        n->vqs[i].tx_waiting = 0;
1059
        n->vqs[i].n = n;
1060
    }
1061

    
1062
    if (ctrl) {
1063
        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1064
    }
1065

    
1066
    virtio_net_set_queues(n);
1067
}
1068

    
1069
static void virtio_net_save(QEMUFile *f, void *opaque)
1070
{
1071
    int i;
1072
    VirtIONet *n = opaque;
1073

    
1074
    /* At this point, backend must be stopped, otherwise
1075
     * it might keep writing to memory. */
1076
    assert(!n->vhost_started);
1077
    virtio_save(&n->vdev, f);
1078

    
1079
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1080
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1081
    qemu_put_be32(f, n->mergeable_rx_bufs);
1082
    qemu_put_be16(f, n->status);
1083
    qemu_put_byte(f, n->promisc);
1084
    qemu_put_byte(f, n->allmulti);
1085
    qemu_put_be32(f, n->mac_table.in_use);
1086
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1087
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1088
    qemu_put_be32(f, n->has_vnet_hdr);
1089
    qemu_put_byte(f, n->mac_table.multi_overflow);
1090
    qemu_put_byte(f, n->mac_table.uni_overflow);
1091
    qemu_put_byte(f, n->alluni);
1092
    qemu_put_byte(f, n->nomulti);
1093
    qemu_put_byte(f, n->nouni);
1094
    qemu_put_byte(f, n->nobcast);
1095
    qemu_put_byte(f, n->has_ufo);
1096
    if (n->max_queues > 1) {
1097
        qemu_put_be16(f, n->max_queues);
1098
        qemu_put_be16(f, n->curr_queues);
1099
        for (i = 1; i < n->curr_queues; i++) {
1100
            qemu_put_be32(f, n->vqs[i].tx_waiting);
1101
        }
1102
    }
1103
}
1104

    
1105
static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
1106
{
1107
    VirtIONet *n = opaque;
1108
    int ret, i, link_down;
1109

    
1110
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
1111
        return -EINVAL;
1112

    
1113
    ret = virtio_load(&n->vdev, f);
1114
    if (ret) {
1115
        return ret;
1116
    }
1117

    
1118
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1119
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1120

    
1121
    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
1122

    
1123
    if (version_id >= 3)
1124
        n->status = qemu_get_be16(f);
1125

    
1126
    if (version_id >= 4) {
1127
        if (version_id < 8) {
1128
            n->promisc = qemu_get_be32(f);
1129
            n->allmulti = qemu_get_be32(f);
1130
        } else {
1131
            n->promisc = qemu_get_byte(f);
1132
            n->allmulti = qemu_get_byte(f);
1133
        }
1134
    }
1135

    
1136
    if (version_id >= 5) {
1137
        n->mac_table.in_use = qemu_get_be32(f);
1138
        /* MAC_TABLE_ENTRIES may be different from the saved image */
1139
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
1140
            qemu_get_buffer(f, n->mac_table.macs,
1141
                            n->mac_table.in_use * ETH_ALEN);
1142
        } else if (n->mac_table.in_use) {
1143
            uint8_t *buf = g_malloc0(n->mac_table.in_use);
1144
            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
1145
            g_free(buf);
1146
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1147
            n->mac_table.in_use = 0;
1148
        }
1149
    }
1150
 
1151
    if (version_id >= 6)
1152
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1153

    
1154
    if (version_id >= 7) {
1155
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1156
            error_report("virtio-net: saved image requires vnet_hdr=on");
1157
            return -1;
1158
        }
1159

    
1160
        if (n->has_vnet_hdr) {
1161
            tap_set_offload(qemu_get_queue(n->nic)->peer,
1162
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
1163
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
1164
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
1165
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
1166
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
1167
        }
1168
    }
1169

    
1170
    if (version_id >= 9) {
1171
        n->mac_table.multi_overflow = qemu_get_byte(f);
1172
        n->mac_table.uni_overflow = qemu_get_byte(f);
1173
    }
1174

    
1175
    if (version_id >= 10) {
1176
        n->alluni = qemu_get_byte(f);
1177
        n->nomulti = qemu_get_byte(f);
1178
        n->nouni = qemu_get_byte(f);
1179
        n->nobcast = qemu_get_byte(f);
1180
    }
1181

    
1182
    if (version_id >= 11) {
1183
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1184
            error_report("virtio-net: saved image requires TUN_F_UFO support");
1185
            return -1;
1186
        }
1187
    }
1188

    
1189
    if (n->max_queues > 1) {
1190
        if (n->max_queues != qemu_get_be16(f)) {
1191
            error_report("virtio-net: different max_queues ");
1192
            return -1;
1193
        }
1194

    
1195
        n->curr_queues = qemu_get_be16(f);
1196
        for (i = 1; i < n->curr_queues; i++) {
1197
            n->vqs[i].tx_waiting = qemu_get_be32(f);
1198
        }
1199
    }
1200

    
1201
    virtio_net_set_queues(n);
1202

    
1203
    /* Find the first multicast entry in the saved MAC filter */
1204
    for (i = 0; i < n->mac_table.in_use; i++) {
1205
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1206
            break;
1207
        }
1208
    }
1209
    n->mac_table.first_multi = i;
1210

    
1211
    /* nc.link_down can't be migrated, so infer link_down according
1212
     * to link status bit in n->status */
1213
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1214
    for (i = 0; i < n->max_queues; i++) {
1215
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
1216
    }
1217

    
1218
    return 0;
1219
}
1220

    
1221
static void virtio_net_cleanup(NetClientState *nc)
1222
{
1223
    VirtIONet *n = qemu_get_nic_opaque(nc);
1224

    
1225
    n->nic = NULL;
1226
}
1227

    
1228
static NetClientInfo net_virtio_info = {
1229
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
1230
    .size = sizeof(NICState),
1231
    .can_receive = virtio_net_can_receive,
1232
    .receive = virtio_net_receive,
1233
        .cleanup = virtio_net_cleanup,
1234
    .link_status_changed = virtio_net_set_link_status,
1235
};
1236

    
1237
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1238
{
1239
    VirtIONet *n = to_virtio_net(vdev);
1240
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1241
    assert(n->vhost_started);
1242
    return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
1243
}
1244

    
1245
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1246
                                           bool mask)
1247
{
1248
    VirtIONet *n = to_virtio_net(vdev);
1249
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1250
    assert(n->vhost_started);
1251
    vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
1252
                             vdev, idx, mask);
1253
}
1254

    
1255
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
1256
                              virtio_net_conf *net, uint32_t host_features)
1257
{
1258
    VirtIONet *n;
1259
    int i, config_size = 0;
1260

    
1261
    for (i = 0; feature_sizes[i].flags != 0; i++) {
1262
        if (host_features & feature_sizes[i].flags) {
1263
            config_size = MAX(feature_sizes[i].end, config_size);
1264
        }
1265
    }
1266

    
1267
    n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
1268
                                        config_size, sizeof(VirtIONet));
1269

    
1270
    n->config_size = config_size;
1271
    n->vdev.get_config = virtio_net_get_config;
1272
    n->vdev.set_config = virtio_net_set_config;
1273
    n->vdev.get_features = virtio_net_get_features;
1274
    n->vdev.set_features = virtio_net_set_features;
1275
    n->vdev.bad_features = virtio_net_bad_features;
1276
    n->vdev.reset = virtio_net_reset;
1277
    n->vdev.set_status = virtio_net_set_status;
1278
    n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
1279
    n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
1280
    n->max_queues = MAX(conf->queues, 1);
1281
    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1282
    n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
1283
    n->curr_queues = 1;
1284
    n->vqs[0].n = n;
1285
    n->tx_timeout = net->txtimer;
1286

    
1287
    if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
1288
        error_report("virtio-net: "
1289
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1290
                     net->tx);
1291
        error_report("Defaulting to \"bh\"");
1292
    }
1293

    
1294
    if (net->tx && !strcmp(net->tx, "timer")) {
1295
        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
1296
                                           virtio_net_handle_tx_timer);
1297
        n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
1298
                                               &n->vqs[0]);
1299
    } else {
1300
        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
1301
                                           virtio_net_handle_tx_bh);
1302
        n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
1303
    }
1304
    n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
1305
    qemu_macaddr_default_if_unset(&conf->macaddr);
1306
    memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
1307
    n->status = VIRTIO_NET_S_LINK_UP;
1308

    
1309
    n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
1310
    peer_test_vnet_hdr(n);
1311
    if (peer_has_vnet_hdr(n)) {
1312
        for (i = 0; i < n->max_queues; i++) {
1313
            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
1314
        }
1315
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
1316
    } else {
1317
        n->host_hdr_len = 0;
1318
    }
1319

    
1320
    qemu_format_nic_info_str(qemu_get_queue(n->nic), conf->macaddr.a);
1321

    
1322
    n->vqs[0].tx_waiting = 0;
1323
    n->tx_burst = net->txburst;
1324
    virtio_net_set_mrg_rx_bufs(n, 0);
1325
    n->promisc = 1; /* for compatibility */
1326

    
1327
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1328

    
1329
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1330

    
1331
    n->qdev = dev;
1332
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
1333
                    virtio_net_save, virtio_net_load, n);
1334

    
1335
    add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
1336

    
1337
    return &n->vdev;
1338
}
1339

    
1340
void virtio_net_exit(VirtIODevice *vdev)
1341
{
1342
    VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
1343
    int i;
1344

    
1345
    /* This will stop vhost backend if appropriate. */
1346
    virtio_net_set_status(vdev, 0);
1347

    
1348
    unregister_savevm(n->qdev, "virtio-net", n);
1349

    
1350
    g_free(n->mac_table.macs);
1351
    g_free(n->vlans);
1352

    
1353
    for (i = 0; i < n->max_queues; i++) {
1354
        VirtIONetQueue *q = &n->vqs[i];
1355
        NetClientState *nc = qemu_get_subqueue(n->nic, i);
1356

    
1357
        qemu_purge_queued_packets(nc);
1358

    
1359
        if (q->tx_timer) {
1360
            qemu_del_timer(q->tx_timer);
1361
            qemu_free_timer(q->tx_timer);
1362
        } else {
1363
            qemu_bh_delete(q->tx_bh);
1364
        }
1365
    }
1366

    
1367
    g_free(n->vqs);
1368
    qemu_del_nic(n->nic);
1369
    virtio_cleanup(&n->vdev);
1370
}