Statistics
| Branch: | Revision:

root / hw / virtio-net.c @ 1e89ad5b

History | View | Annotate | Download (40.1 kB)

1
/*
2
 * Virtio Network Device
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include "qemu/iov.h"
15
#include "virtio.h"
16
#include "net/net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19
#include "qemu/error-report.h"
20
#include "qemu/timer.h"
21
#include "virtio-net.h"
22
#include "vhost_net.h"
23

    
24
#define VIRTIO_NET_VM_VERSION    11
25

    
26
#define MAC_TABLE_ENTRIES    64
27
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
28

    
29
typedef struct VirtIONetQueue {
30
    VirtQueue *rx_vq;
31
    VirtQueue *tx_vq;
32
    QEMUTimer *tx_timer;
33
    QEMUBH *tx_bh;
34
    int tx_waiting;
35
    struct {
36
        VirtQueueElement elem;
37
        ssize_t len;
38
    } async_tx;
39
    struct VirtIONet *n;
40
} VirtIONetQueue;
41

    
42
typedef struct VirtIONet
43
{
44
    VirtIODevice vdev;
45
    uint8_t mac[ETH_ALEN];
46
    uint16_t status;
47
    VirtIONetQueue vqs[MAX_QUEUE_NUM];
48
    VirtQueue *ctrl_vq;
49
    NICState *nic;
50
    uint32_t tx_timeout;
51
    int32_t tx_burst;
52
    uint32_t has_vnet_hdr;
53
    size_t host_hdr_len;
54
    size_t guest_hdr_len;
55
    uint8_t has_ufo;
56
    int mergeable_rx_bufs;
57
    uint8_t promisc;
58
    uint8_t allmulti;
59
    uint8_t alluni;
60
    uint8_t nomulti;
61
    uint8_t nouni;
62
    uint8_t nobcast;
63
    uint8_t vhost_started;
64
    struct {
65
        int in_use;
66
        int first_multi;
67
        uint8_t multi_overflow;
68
        uint8_t uni_overflow;
69
        uint8_t *macs;
70
    } mac_table;
71
    uint32_t *vlans;
72
    DeviceState *qdev;
73
    int multiqueue;
74
    uint16_t max_queues;
75
    uint16_t curr_queues;
76
} VirtIONet;
77

    
78
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
79
{
80
    VirtIONet *n = qemu_get_nic_opaque(nc);
81

    
82
    return &n->vqs[nc->queue_index];
83
}
84

    
85
static int vq2q(int queue_index)
86
{
87
    return queue_index / 2;
88
}
89

    
90
/* TODO
91
 * - we could suppress RX interrupt if we were so inclined.
92
 */
93

    
94
static VirtIONet *to_virtio_net(VirtIODevice *vdev)
95
{
96
    return (VirtIONet *)vdev;
97
}
98

    
99
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
100
{
101
    VirtIONet *n = to_virtio_net(vdev);
102
    struct virtio_net_config netcfg;
103

    
104
    stw_p(&netcfg.status, n->status);
105
    stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
106
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
107
    memcpy(config, &netcfg, sizeof(netcfg));
108
}
109

    
110
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
111
{
112
    VirtIONet *n = to_virtio_net(vdev);
113
    struct virtio_net_config netcfg;
114

    
115
    memcpy(&netcfg, config, sizeof(netcfg));
116

    
117
    if (!(n->vdev.guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
118
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
119
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
120
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
121
    }
122
}
123

    
124
static bool virtio_net_started(VirtIONet *n, uint8_t status)
125
{
126
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
127
        (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
128
}
129

    
130
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
131
{
132
    NetClientState *nc = qemu_get_queue(n->nic);
133
    int queues = n->multiqueue ? n->max_queues : 1;
134

    
135
    if (!nc->peer) {
136
        return;
137
    }
138
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
139
        return;
140
    }
141

    
142
    if (!tap_get_vhost_net(nc->peer)) {
143
        return;
144
    }
145

    
146
    if (!!n->vhost_started == virtio_net_started(n, status) &&
147
                              !nc->peer->link_down) {
148
        return;
149
    }
150
    if (!n->vhost_started) {
151
        int r;
152
        if (!vhost_net_query(tap_get_vhost_net(nc->peer), &n->vdev)) {
153
            return;
154
        }
155
        n->vhost_started = 1;
156
        r = vhost_net_start(&n->vdev, n->nic->ncs, queues);
157
        if (r < 0) {
158
            error_report("unable to start vhost net: %d: "
159
                         "falling back on userspace virtio", -r);
160
            n->vhost_started = 0;
161
        }
162
    } else {
163
        vhost_net_stop(&n->vdev, n->nic->ncs, queues);
164
        n->vhost_started = 0;
165
    }
166
}
167

    
168
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
169
{
170
    VirtIONet *n = to_virtio_net(vdev);
171
    VirtIONetQueue *q;
172
    int i;
173
    uint8_t queue_status;
174

    
175
    virtio_net_vhost_status(n, status);
176

    
177
    for (i = 0; i < n->max_queues; i++) {
178
        q = &n->vqs[i];
179

    
180
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
181
            queue_status = 0;
182
        } else {
183
            queue_status = status;
184
        }
185

    
186
        if (!q->tx_waiting) {
187
            continue;
188
        }
189

    
190
        if (virtio_net_started(n, queue_status) && !n->vhost_started) {
191
            if (q->tx_timer) {
192
                qemu_mod_timer(q->tx_timer,
193
                               qemu_get_clock_ns(vm_clock) + n->tx_timeout);
194
            } else {
195
                qemu_bh_schedule(q->tx_bh);
196
            }
197
        } else {
198
            if (q->tx_timer) {
199
                qemu_del_timer(q->tx_timer);
200
            } else {
201
                qemu_bh_cancel(q->tx_bh);
202
            }
203
        }
204
    }
205
}
206

    
207
static void virtio_net_set_link_status(NetClientState *nc)
208
{
209
    VirtIONet *n = qemu_get_nic_opaque(nc);
210
    uint16_t old_status = n->status;
211

    
212
    if (nc->link_down)
213
        n->status &= ~VIRTIO_NET_S_LINK_UP;
214
    else
215
        n->status |= VIRTIO_NET_S_LINK_UP;
216

    
217
    if (n->status != old_status)
218
        virtio_notify_config(&n->vdev);
219

    
220
    virtio_net_set_status(&n->vdev, n->vdev.status);
221
}
222

    
223
static void virtio_net_reset(VirtIODevice *vdev)
224
{
225
    VirtIONet *n = to_virtio_net(vdev);
226

    
227
    /* Reset back to compatibility mode */
228
    n->promisc = 1;
229
    n->allmulti = 0;
230
    n->alluni = 0;
231
    n->nomulti = 0;
232
    n->nouni = 0;
233
    n->nobcast = 0;
234
    /* multiqueue is disabled by default */
235
    n->curr_queues = 1;
236

    
237
    /* Flush any MAC and VLAN filter table state */
238
    n->mac_table.in_use = 0;
239
    n->mac_table.first_multi = 0;
240
    n->mac_table.multi_overflow = 0;
241
    n->mac_table.uni_overflow = 0;
242
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
243
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
244
    memset(n->vlans, 0, MAX_VLAN >> 3);
245
}
246

    
247
static void peer_test_vnet_hdr(VirtIONet *n)
248
{
249
    NetClientState *nc = qemu_get_queue(n->nic);
250
    if (!nc->peer) {
251
        return;
252
    }
253

    
254
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
255
        return;
256
    }
257

    
258
    n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
259
}
260

    
261
static int peer_has_vnet_hdr(VirtIONet *n)
262
{
263
    return n->has_vnet_hdr;
264
}
265

    
266
static int peer_has_ufo(VirtIONet *n)
267
{
268
    if (!peer_has_vnet_hdr(n))
269
        return 0;
270

    
271
    n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
272

    
273
    return n->has_ufo;
274
}
275

    
276
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
277
{
278
    int i;
279
    NetClientState *nc;
280

    
281
    n->mergeable_rx_bufs = mergeable_rx_bufs;
282

    
283
    n->guest_hdr_len = n->mergeable_rx_bufs ?
284
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
285

    
286
    for (i = 0; i < n->max_queues; i++) {
287
        nc = qemu_get_subqueue(n->nic, i);
288

    
289
        if (peer_has_vnet_hdr(n) &&
290
            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
291
            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
292
            n->host_hdr_len = n->guest_hdr_len;
293
        }
294
    }
295
}
296

    
297
static int peer_attach(VirtIONet *n, int index)
298
{
299
    NetClientState *nc = qemu_get_subqueue(n->nic, index);
300

    
301
    if (!nc->peer) {
302
        return 0;
303
    }
304

    
305
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
306
        return 0;
307
    }
308

    
309
    return tap_enable(nc->peer);
310
}
311

    
312
static int peer_detach(VirtIONet *n, int index)
313
{
314
    NetClientState *nc = qemu_get_subqueue(n->nic, index);
315

    
316
    if (!nc->peer) {
317
        return 0;
318
    }
319

    
320
    if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
321
        return 0;
322
    }
323

    
324
    return tap_disable(nc->peer);
325
}
326

    
327
static void virtio_net_set_queues(VirtIONet *n)
328
{
329
    int i;
330

    
331
    for (i = 0; i < n->max_queues; i++) {
332
        if (i < n->curr_queues) {
333
            assert(!peer_attach(n, i));
334
        } else {
335
            assert(!peer_detach(n, i));
336
        }
337
    }
338
}
339

    
340
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
341

    
342
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
343
{
344
    VirtIONet *n = to_virtio_net(vdev);
345
    NetClientState *nc = qemu_get_queue(n->nic);
346

    
347
    features |= (1 << VIRTIO_NET_F_MAC);
348

    
349
    if (!peer_has_vnet_hdr(n)) {
350
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
351
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
352
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
353
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
354

    
355
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
356
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
357
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
358
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
359
    }
360

    
361
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
362
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
363
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
364
    }
365

    
366
    if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
367
        return features;
368
    }
369
    if (!tap_get_vhost_net(nc->peer)) {
370
        return features;
371
    }
372
    return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
373
}
374

    
375
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
376
{
377
    uint32_t features = 0;
378

    
379
    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
380
     * but also these: */
381
    features |= (1 << VIRTIO_NET_F_MAC);
382
    features |= (1 << VIRTIO_NET_F_CSUM);
383
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
384
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
385
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
386

    
387
    return features;
388
}
389

    
390
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
391
{
392
    VirtIONet *n = to_virtio_net(vdev);
393
    int i;
394

    
395
    virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
396
                              !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
397

    
398
    virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
399

    
400
    if (n->has_vnet_hdr) {
401
        tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
402
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
403
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
404
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
405
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
406
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
407
    }
408

    
409
    for (i = 0;  i < n->max_queues; i++) {
410
        NetClientState *nc = qemu_get_subqueue(n->nic, i);
411

    
412
        if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
413
            continue;
414
        }
415
        if (!tap_get_vhost_net(nc->peer)) {
416
            continue;
417
        }
418
        vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
419
    }
420
}
421

    
422
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
423
                                     struct iovec *iov, unsigned int iov_cnt)
424
{
425
    uint8_t on;
426
    size_t s;
427

    
428
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
429
    if (s != sizeof(on)) {
430
        return VIRTIO_NET_ERR;
431
    }
432

    
433
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
434
        n->promisc = on;
435
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
436
        n->allmulti = on;
437
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
438
        n->alluni = on;
439
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
440
        n->nomulti = on;
441
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
442
        n->nouni = on;
443
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
444
        n->nobcast = on;
445
    } else {
446
        return VIRTIO_NET_ERR;
447
    }
448

    
449
    return VIRTIO_NET_OK;
450
}
451

    
452
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
453
                                 struct iovec *iov, unsigned int iov_cnt)
454
{
455
    struct virtio_net_ctrl_mac mac_data;
456
    size_t s;
457

    
458
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
459
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
460
            return VIRTIO_NET_ERR;
461
        }
462
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
463
        assert(s == sizeof(n->mac));
464
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
465
        return VIRTIO_NET_OK;
466
    }
467

    
468
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
469
        return VIRTIO_NET_ERR;
470
    }
471

    
472
    n->mac_table.in_use = 0;
473
    n->mac_table.first_multi = 0;
474
    n->mac_table.uni_overflow = 0;
475
    n->mac_table.multi_overflow = 0;
476
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
477

    
478
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
479
                   sizeof(mac_data.entries));
480
    mac_data.entries = ldl_p(&mac_data.entries);
481
    if (s != sizeof(mac_data.entries)) {
482
        return VIRTIO_NET_ERR;
483
    }
484
    iov_discard_front(&iov, &iov_cnt, s);
485

    
486
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
487
        return VIRTIO_NET_ERR;
488
    }
489

    
490
    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
491
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
492
                       mac_data.entries * ETH_ALEN);
493
        if (s != mac_data.entries * ETH_ALEN) {
494
            return VIRTIO_NET_ERR;
495
        }
496
        n->mac_table.in_use += mac_data.entries;
497
    } else {
498
        n->mac_table.uni_overflow = 1;
499
    }
500

    
501
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
502

    
503
    n->mac_table.first_multi = n->mac_table.in_use;
504

    
505
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
506
                   sizeof(mac_data.entries));
507
    mac_data.entries = ldl_p(&mac_data.entries);
508
    if (s != sizeof(mac_data.entries)) {
509
        return VIRTIO_NET_ERR;
510
    }
511

    
512
    iov_discard_front(&iov, &iov_cnt, s);
513

    
514
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
515
        return VIRTIO_NET_ERR;
516
    }
517

    
518
    if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
519
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
520
                       mac_data.entries * ETH_ALEN);
521
        if (s != mac_data.entries * ETH_ALEN) {
522
            return VIRTIO_NET_ERR;
523
        }
524
        n->mac_table.in_use += mac_data.entries;
525
    } else {
526
        n->mac_table.multi_overflow = 1;
527
    }
528

    
529
    return VIRTIO_NET_OK;
530
}
531

    
532
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
533
                                        struct iovec *iov, unsigned int iov_cnt)
534
{
535
    uint16_t vid;
536
    size_t s;
537

    
538
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
539
    vid = lduw_p(&vid);
540
    if (s != sizeof(vid)) {
541
        return VIRTIO_NET_ERR;
542
    }
543

    
544
    if (vid >= MAX_VLAN)
545
        return VIRTIO_NET_ERR;
546

    
547
    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
548
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
549
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
550
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
551
    else
552
        return VIRTIO_NET_ERR;
553

    
554
    return VIRTIO_NET_OK;
555
}
556

    
557
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
558
                                VirtQueueElement *elem)
559
{
560
    struct virtio_net_ctrl_mq s;
561

    
562
    if (elem->out_num != 2 ||
563
        elem->out_sg[1].iov_len != sizeof(struct virtio_net_ctrl_mq)) {
564
        error_report("virtio-net ctrl invalid steering command");
565
        return VIRTIO_NET_ERR;
566
    }
567

    
568
    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
569
        return VIRTIO_NET_ERR;
570
    }
571

    
572
    memcpy(&s, elem->out_sg[1].iov_base, sizeof(struct virtio_net_ctrl_mq));
573

    
574
    if (s.virtqueue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
575
        s.virtqueue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
576
        s.virtqueue_pairs > n->max_queues ||
577
        !n->multiqueue) {
578
        return VIRTIO_NET_ERR;
579
    }
580

    
581
    n->curr_queues = s.virtqueue_pairs;
582
    /* stop the backend before changing the number of queues to avoid handling a
583
     * disabled queue */
584
    virtio_net_set_status(&n->vdev, n->vdev.status);
585
    virtio_net_set_queues(n);
586

    
587
    return VIRTIO_NET_OK;
588
}
589
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
590
{
591
    VirtIONet *n = to_virtio_net(vdev);
592
    struct virtio_net_ctrl_hdr ctrl;
593
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
594
    VirtQueueElement elem;
595
    size_t s;
596
    struct iovec *iov;
597
    unsigned int iov_cnt;
598

    
599
    while (virtqueue_pop(vq, &elem)) {
600
        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
601
            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
602
            error_report("virtio-net ctrl missing headers");
603
            exit(1);
604
        }
605

    
606
        iov = elem.out_sg;
607
        iov_cnt = elem.out_num;
608
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
609
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
610
        if (s != sizeof(ctrl)) {
611
            status = VIRTIO_NET_ERR;
612
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
613
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
614
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
615
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
616
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
617
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
618
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
619
            status = virtio_net_handle_mq(n, ctrl.cmd, &elem);
620
        }
621

    
622
        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
623
        assert(s == sizeof(status));
624

    
625
        virtqueue_push(vq, &elem, sizeof(status));
626
        virtio_notify(vdev, vq);
627
    }
628
}
629

    
630
/* RX */
631

    
632
static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
633
{
634
    VirtIONet *n = to_virtio_net(vdev);
635
    int queue_index = vq2q(virtio_get_queue_index(vq));
636

    
637
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
638
}
639

    
640
static int virtio_net_can_receive(NetClientState *nc)
641
{
642
    VirtIONet *n = qemu_get_nic_opaque(nc);
643
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
644

    
645
    if (!n->vdev.vm_running) {
646
        return 0;
647
    }
648

    
649
    if (nc->queue_index >= n->curr_queues) {
650
        return 0;
651
    }
652

    
653
    if (!virtio_queue_ready(q->rx_vq) ||
654
        !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
655
        return 0;
656
    }
657

    
658
    return 1;
659
}
660

    
661
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
662
{
663
    VirtIONet *n = q->n;
664
    if (virtio_queue_empty(q->rx_vq) ||
665
        (n->mergeable_rx_bufs &&
666
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
667
        virtio_queue_set_notification(q->rx_vq, 1);
668

    
669
        /* To avoid a race condition where the guest has made some buffers
670
         * available after the above check but before notification was
671
         * enabled, check for available buffers again.
672
         */
673
        if (virtio_queue_empty(q->rx_vq) ||
674
            (n->mergeable_rx_bufs &&
675
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
676
            return 0;
677
        }
678
    }
679

    
680
    virtio_queue_set_notification(q->rx_vq, 0);
681
    return 1;
682
}
683

    
684
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
685
 * it never finds out that the packets don't have valid checksums.  This
686
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
687
 * fix this with Xen but it hasn't appeared in an upstream release of
688
 * dhclient yet.
689
 *
690
 * To avoid breaking existing guests, we catch udp packets and add
691
 * checksums.  This is terrible but it's better than hacking the guest
692
 * kernels.
693
 *
694
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
695
 * we should provide a mechanism to disable it to avoid polluting the host
696
 * cache.
697
 */
698
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
699
                                        uint8_t *buf, size_t size)
700
{
701
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
702
        (size > 27 && size < 1500) && /* normal sized MTU */
703
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
704
        (buf[23] == 17) && /* ip.protocol == UDP */
705
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
706
        net_checksum_calculate(buf, size);
707
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
708
    }
709
}
710

    
711
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
712
                           const void *buf, size_t size)
713
{
714
    if (n->has_vnet_hdr) {
715
        /* FIXME this cast is evil */
716
        void *wbuf = (void *)buf;
717
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
718
                                    size - n->host_hdr_len);
719
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
720
    } else {
721
        struct virtio_net_hdr hdr = {
722
            .flags = 0,
723
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
724
        };
725
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
726
    }
727
}
728

    
729
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
730
{
731
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
732
    static const uint8_t vlan[] = {0x81, 0x00};
733
    uint8_t *ptr = (uint8_t *)buf;
734
    int i;
735

    
736
    if (n->promisc)
737
        return 1;
738

    
739
    ptr += n->host_hdr_len;
740

    
741
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
742
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
743
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
744
            return 0;
745
    }
746

    
747
    if (ptr[0] & 1) { // multicast
748
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
749
            return !n->nobcast;
750
        } else if (n->nomulti) {
751
            return 0;
752
        } else if (n->allmulti || n->mac_table.multi_overflow) {
753
            return 1;
754
        }
755

    
756
        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
757
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
758
                return 1;
759
            }
760
        }
761
    } else { // unicast
762
        if (n->nouni) {
763
            return 0;
764
        } else if (n->alluni || n->mac_table.uni_overflow) {
765
            return 1;
766
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
767
            return 1;
768
        }
769

    
770
        for (i = 0; i < n->mac_table.first_multi; i++) {
771
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
772
                return 1;
773
            }
774
        }
775
    }
776

    
777
    return 0;
778
}
779

    
780
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
781
{
782
    VirtIONet *n = qemu_get_nic_opaque(nc);
783
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
784
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
785
    struct virtio_net_hdr_mrg_rxbuf mhdr;
786
    unsigned mhdr_cnt = 0;
787
    size_t offset, i, guest_offset;
788

    
789
    if (!virtio_net_can_receive(nc)) {
790
        return -1;
791
    }
792

    
793
    /* hdr_len refers to the header we supply to the guest */
794
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
795
        return 0;
796
    }
797

    
798
    if (!receive_filter(n, buf, size))
799
        return size;
800

    
801
    offset = i = 0;
802

    
803
    while (offset < size) {
804
        VirtQueueElement elem;
805
        int len, total;
806
        const struct iovec *sg = elem.in_sg;
807

    
808
        total = 0;
809

    
810
        if (virtqueue_pop(q->rx_vq, &elem) == 0) {
811
            if (i == 0)
812
                return -1;
813
            error_report("virtio-net unexpected empty queue: "
814
                    "i %zd mergeable %d offset %zd, size %zd, "
815
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x",
816
                    i, n->mergeable_rx_bufs, offset, size,
817
                    n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
818
            exit(1);
819
        }
820

    
821
        if (elem.in_num < 1) {
822
            error_report("virtio-net receive queue contains no in buffers");
823
            exit(1);
824
        }
825

    
826
        if (i == 0) {
827
            assert(offset == 0);
828
            if (n->mergeable_rx_bufs) {
829
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
830
                                    sg, elem.in_num,
831
                                    offsetof(typeof(mhdr), num_buffers),
832
                                    sizeof(mhdr.num_buffers));
833
            }
834

    
835
            receive_header(n, sg, elem.in_num, buf, size);
836
            offset = n->host_hdr_len;
837
            total += n->guest_hdr_len;
838
            guest_offset = n->guest_hdr_len;
839
        } else {
840
            guest_offset = 0;
841
        }
842

    
843
        /* copy in packet.  ugh */
844
        len = iov_from_buf(sg, elem.in_num, guest_offset,
845
                           buf + offset, size - offset);
846
        total += len;
847
        offset += len;
848
        /* If buffers can't be merged, at this point we
849
         * must have consumed the complete packet.
850
         * Otherwise, drop it. */
851
        if (!n->mergeable_rx_bufs && offset < size) {
852
#if 0
853
            error_report("virtio-net truncated non-mergeable packet: "
854
                         "i %zd mergeable %d offset %zd, size %zd, "
855
                         "guest hdr len %zd, host hdr len %zd",
856
                         i, n->mergeable_rx_bufs,
857
                         offset, size, n->guest_hdr_len, n->host_hdr_len);
858
#endif
859
            return size;
860
        }
861

    
862
        /* signal other side */
863
        virtqueue_fill(q->rx_vq, &elem, total, i++);
864
    }
865

    
866
    if (mhdr_cnt) {
867
        stw_p(&mhdr.num_buffers, i);
868
        iov_from_buf(mhdr_sg, mhdr_cnt,
869
                     0,
870
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
871
    }
872

    
873
    virtqueue_flush(q->rx_vq, i);
874
    virtio_notify(&n->vdev, q->rx_vq);
875

    
876
    return size;
877
}
878

    
879
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
880

    
881
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
882
{
883
    VirtIONet *n = qemu_get_nic_opaque(nc);
884
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
885

    
886
    virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
887
    virtio_notify(&n->vdev, q->tx_vq);
888

    
889
    q->async_tx.elem.out_num = q->async_tx.len = 0;
890

    
891
    virtio_queue_set_notification(q->tx_vq, 1);
892
    virtio_net_flush_tx(q);
893
}
894

    
895
/* TX */
896
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
897
{
898
    VirtIONet *n = q->n;
899
    VirtQueueElement elem;
900
    int32_t num_packets = 0;
901
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
902
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
903
        return num_packets;
904
    }
905

    
906
    assert(n->vdev.vm_running);
907

    
908
    if (q->async_tx.elem.out_num) {
909
        virtio_queue_set_notification(q->tx_vq, 0);
910
        return num_packets;
911
    }
912

    
913
    while (virtqueue_pop(q->tx_vq, &elem)) {
914
        ssize_t ret, len;
915
        unsigned int out_num = elem.out_num;
916
        struct iovec *out_sg = &elem.out_sg[0];
917
        struct iovec sg[VIRTQUEUE_MAX_SIZE];
918

    
919
        if (out_num < 1) {
920
            error_report("virtio-net header not in first element");
921
            exit(1);
922
        }
923

    
924
        /*
925
         * If host wants to see the guest header as is, we can
926
         * pass it on unchanged. Otherwise, copy just the parts
927
         * that host is interested in.
928
         */
929
        assert(n->host_hdr_len <= n->guest_hdr_len);
930
        if (n->host_hdr_len != n->guest_hdr_len) {
931
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
932
                                       out_sg, out_num,
933
                                       0, n->host_hdr_len);
934
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
935
                             out_sg, out_num,
936
                             n->guest_hdr_len, -1);
937
            out_num = sg_num;
938
            out_sg = sg;
939
        }
940

    
941
        len = n->guest_hdr_len;
942

    
943
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
944
                                      out_sg, out_num, virtio_net_tx_complete);
945
        if (ret == 0) {
946
            virtio_queue_set_notification(q->tx_vq, 0);
947
            q->async_tx.elem = elem;
948
            q->async_tx.len  = len;
949
            return -EBUSY;
950
        }
951

    
952
        len += ret;
953

    
954
        virtqueue_push(q->tx_vq, &elem, 0);
955
        virtio_notify(&n->vdev, q->tx_vq);
956

    
957
        if (++num_packets >= n->tx_burst) {
958
            break;
959
        }
960
    }
961
    return num_packets;
962
}
963

    
964
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
965
{
966
    VirtIONet *n = to_virtio_net(vdev);
967
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
968

    
969
    /* This happens when device was stopped but VCPU wasn't. */
970
    if (!n->vdev.vm_running) {
971
        q->tx_waiting = 1;
972
        return;
973
    }
974

    
975
    if (q->tx_waiting) {
976
        virtio_queue_set_notification(vq, 1);
977
        qemu_del_timer(q->tx_timer);
978
        q->tx_waiting = 0;
979
        virtio_net_flush_tx(q);
980
    } else {
981
        qemu_mod_timer(q->tx_timer,
982
                       qemu_get_clock_ns(vm_clock) + n->tx_timeout);
983
        q->tx_waiting = 1;
984
        virtio_queue_set_notification(vq, 0);
985
    }
986
}
987

    
988
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
989
{
990
    VirtIONet *n = to_virtio_net(vdev);
991
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
992

    
993
    if (unlikely(q->tx_waiting)) {
994
        return;
995
    }
996
    q->tx_waiting = 1;
997
    /* This happens when device was stopped but VCPU wasn't. */
998
    if (!n->vdev.vm_running) {
999
        return;
1000
    }
1001
    virtio_queue_set_notification(vq, 0);
1002
    qemu_bh_schedule(q->tx_bh);
1003
}
1004

    
1005
static void virtio_net_tx_timer(void *opaque)
1006
{
1007
    VirtIONetQueue *q = opaque;
1008
    VirtIONet *n = q->n;
1009
    assert(n->vdev.vm_running);
1010

    
1011
    q->tx_waiting = 0;
1012

    
1013
    /* Just in case the driver is not ready on more */
1014
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1015
        return;
1016

    
1017
    virtio_queue_set_notification(q->tx_vq, 1);
1018
    virtio_net_flush_tx(q);
1019
}
1020

    
1021
static void virtio_net_tx_bh(void *opaque)
1022
{
1023
    VirtIONetQueue *q = opaque;
1024
    VirtIONet *n = q->n;
1025
    int32_t ret;
1026

    
1027
    assert(n->vdev.vm_running);
1028

    
1029
    q->tx_waiting = 0;
1030

    
1031
    /* Just in case the driver is not ready on more */
1032
    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
1033
        return;
1034

    
1035
    ret = virtio_net_flush_tx(q);
1036
    if (ret == -EBUSY) {
1037
        return; /* Notification re-enable handled by tx_complete */
1038
    }
1039

    
1040
    /* If we flush a full burst of packets, assume there are
1041
     * more coming and immediately reschedule */
1042
    if (ret >= n->tx_burst) {
1043
        qemu_bh_schedule(q->tx_bh);
1044
        q->tx_waiting = 1;
1045
        return;
1046
    }
1047

    
1048
    /* If less than a full burst, re-enable notification and flush
1049
     * anything that may have come in while we weren't looking.  If
1050
     * we find something, assume the guest is still active and reschedule */
1051
    virtio_queue_set_notification(q->tx_vq, 1);
1052
    if (virtio_net_flush_tx(q) > 0) {
1053
        virtio_queue_set_notification(q->tx_vq, 0);
1054
        qemu_bh_schedule(q->tx_bh);
1055
        q->tx_waiting = 1;
1056
    }
1057
}
1058

    
1059
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
1060
{
1061
    VirtIODevice *vdev = &n->vdev;
1062
    int i, max = multiqueue ? n->max_queues : 1;
1063

    
1064
    n->multiqueue = multiqueue;
1065

    
1066
    for (i = 2; i <= n->max_queues * 2 + 1; i++) {
1067
        virtio_del_queue(vdev, i);
1068
    }
1069

    
1070
    for (i = 1; i < max; i++) {
1071
        n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
1072
        if (n->vqs[i].tx_timer) {
1073
            n->vqs[i].tx_vq =
1074
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
1075
            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
1076
                                                   virtio_net_tx_timer,
1077
                                                   &n->vqs[i]);
1078
        } else {
1079
            n->vqs[i].tx_vq =
1080
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
1081
            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
1082
        }
1083

    
1084
        n->vqs[i].tx_waiting = 0;
1085
        n->vqs[i].n = n;
1086
    }
1087

    
1088
    if (ctrl) {
1089
        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1090
    }
1091

    
1092
    virtio_net_set_queues(n);
1093
}
1094

    
1095
static void virtio_net_save(QEMUFile *f, void *opaque)
1096
{
1097
    int i;
1098
    VirtIONet *n = opaque;
1099

    
1100
    /* At this point, backend must be stopped, otherwise
1101
     * it might keep writing to memory. */
1102
    assert(!n->vhost_started);
1103
    virtio_save(&n->vdev, f);
1104

    
1105
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1106
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1107
    qemu_put_be32(f, n->mergeable_rx_bufs);
1108
    qemu_put_be16(f, n->status);
1109
    qemu_put_byte(f, n->promisc);
1110
    qemu_put_byte(f, n->allmulti);
1111
    qemu_put_be32(f, n->mac_table.in_use);
1112
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1113
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1114
    qemu_put_be32(f, n->has_vnet_hdr);
1115
    qemu_put_byte(f, n->mac_table.multi_overflow);
1116
    qemu_put_byte(f, n->mac_table.uni_overflow);
1117
    qemu_put_byte(f, n->alluni);
1118
    qemu_put_byte(f, n->nomulti);
1119
    qemu_put_byte(f, n->nouni);
1120
    qemu_put_byte(f, n->nobcast);
1121
    qemu_put_byte(f, n->has_ufo);
1122
    if (n->max_queues > 1) {
1123
        qemu_put_be16(f, n->max_queues);
1124
        qemu_put_be16(f, n->curr_queues);
1125
        for (i = 1; i < n->curr_queues; i++) {
1126
            qemu_put_be32(f, n->vqs[i].tx_waiting);
1127
        }
1128
    }
1129
}
1130

    
1131
static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
1132
{
1133
    VirtIONet *n = opaque;
1134
    int ret, i, link_down;
1135

    
1136
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
1137
        return -EINVAL;
1138

    
1139
    ret = virtio_load(&n->vdev, f);
1140
    if (ret) {
1141
        return ret;
1142
    }
1143

    
1144
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1145
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1146

    
1147
    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
1148

    
1149
    if (version_id >= 3)
1150
        n->status = qemu_get_be16(f);
1151

    
1152
    if (version_id >= 4) {
1153
        if (version_id < 8) {
1154
            n->promisc = qemu_get_be32(f);
1155
            n->allmulti = qemu_get_be32(f);
1156
        } else {
1157
            n->promisc = qemu_get_byte(f);
1158
            n->allmulti = qemu_get_byte(f);
1159
        }
1160
    }
1161

    
1162
    if (version_id >= 5) {
1163
        n->mac_table.in_use = qemu_get_be32(f);
1164
        /* MAC_TABLE_ENTRIES may be different from the saved image */
1165
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
1166
            qemu_get_buffer(f, n->mac_table.macs,
1167
                            n->mac_table.in_use * ETH_ALEN);
1168
        } else if (n->mac_table.in_use) {
1169
            uint8_t *buf = g_malloc0(n->mac_table.in_use);
1170
            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
1171
            g_free(buf);
1172
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1173
            n->mac_table.in_use = 0;
1174
        }
1175
    }
1176
 
1177
    if (version_id >= 6)
1178
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1179

    
1180
    if (version_id >= 7) {
1181
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1182
            error_report("virtio-net: saved image requires vnet_hdr=on");
1183
            return -1;
1184
        }
1185

    
1186
        if (n->has_vnet_hdr) {
1187
            tap_set_offload(qemu_get_queue(n->nic)->peer,
1188
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
1189
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
1190
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
1191
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
1192
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
1193
        }
1194
    }
1195

    
1196
    if (version_id >= 9) {
1197
        n->mac_table.multi_overflow = qemu_get_byte(f);
1198
        n->mac_table.uni_overflow = qemu_get_byte(f);
1199
    }
1200

    
1201
    if (version_id >= 10) {
1202
        n->alluni = qemu_get_byte(f);
1203
        n->nomulti = qemu_get_byte(f);
1204
        n->nouni = qemu_get_byte(f);
1205
        n->nobcast = qemu_get_byte(f);
1206
    }
1207

    
1208
    if (version_id >= 11) {
1209
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1210
            error_report("virtio-net: saved image requires TUN_F_UFO support");
1211
            return -1;
1212
        }
1213
    }
1214

    
1215
    if (n->max_queues > 1) {
1216
        if (n->max_queues != qemu_get_be16(f)) {
1217
            error_report("virtio-net: different max_queues ");
1218
            return -1;
1219
        }
1220

    
1221
        n->curr_queues = qemu_get_be16(f);
1222
        for (i = 1; i < n->curr_queues; i++) {
1223
            n->vqs[i].tx_waiting = qemu_get_be32(f);
1224
        }
1225
    }
1226

    
1227
    virtio_net_set_queues(n);
1228

    
1229
    /* Find the first multicast entry in the saved MAC filter */
1230
    for (i = 0; i < n->mac_table.in_use; i++) {
1231
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
1232
            break;
1233
        }
1234
    }
1235
    n->mac_table.first_multi = i;
1236

    
1237
    /* nc.link_down can't be migrated, so infer link_down according
1238
     * to link status bit in n->status */
1239
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
1240
    for (i = 0; i < n->max_queues; i++) {
1241
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
1242
    }
1243

    
1244
    return 0;
1245
}
1246

    
1247
static void virtio_net_cleanup(NetClientState *nc)
1248
{
1249
    VirtIONet *n = qemu_get_nic_opaque(nc);
1250

    
1251
    n->nic = NULL;
1252
}
1253

    
1254
static NetClientInfo net_virtio_info = {
1255
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
1256
    .size = sizeof(NICState),
1257
    .can_receive = virtio_net_can_receive,
1258
    .receive = virtio_net_receive,
1259
        .cleanup = virtio_net_cleanup,
1260
    .link_status_changed = virtio_net_set_link_status,
1261
};
1262

    
1263
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
1264
{
1265
    VirtIONet *n = to_virtio_net(vdev);
1266
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1267
    assert(n->vhost_started);
1268
    return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
1269
}
1270

    
1271
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
1272
                                           bool mask)
1273
{
1274
    VirtIONet *n = to_virtio_net(vdev);
1275
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1276
    assert(n->vhost_started);
1277
    vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
1278
                             vdev, idx, mask);
1279
}
1280

    
1281
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
1282
                              virtio_net_conf *net,
1283
                              uint32_t host_features)
1284
{
1285
    VirtIONet *n;
1286
    int i;
1287

    
1288
    n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
1289
                                        sizeof(struct virtio_net_config),
1290
                                        sizeof(VirtIONet));
1291

    
1292
    n->vdev.get_config = virtio_net_get_config;
1293
    n->vdev.set_config = virtio_net_set_config;
1294
    n->vdev.get_features = virtio_net_get_features;
1295
    n->vdev.set_features = virtio_net_set_features;
1296
    n->vdev.bad_features = virtio_net_bad_features;
1297
    n->vdev.reset = virtio_net_reset;
1298
    n->vdev.set_status = virtio_net_set_status;
1299
    n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
1300
    n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
1301
    n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
1302
    n->max_queues = conf->queues;
1303
    n->curr_queues = 1;
1304
    n->vqs[0].n = n;
1305
    n->tx_timeout = net->txtimer;
1306

    
1307
    if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
1308
        error_report("virtio-net: "
1309
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1310
                     net->tx);
1311
        error_report("Defaulting to \"bh\"");
1312
    }
1313

    
1314
    if (net->tx && !strcmp(net->tx, "timer")) {
1315
        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
1316
                                           virtio_net_handle_tx_timer);
1317
        n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
1318
                                               &n->vqs[0]);
1319
    } else {
1320
        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
1321
                                           virtio_net_handle_tx_bh);
1322
        n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
1323
    }
1324
    n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
1325
    qemu_macaddr_default_if_unset(&conf->macaddr);
1326
    memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
1327
    n->status = VIRTIO_NET_S_LINK_UP;
1328

    
1329
    n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
1330
    peer_test_vnet_hdr(n);
1331
    if (peer_has_vnet_hdr(n)) {
1332
        for (i = 0; i < n->max_queues; i++) {
1333
            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
1334
        }
1335
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
1336
    } else {
1337
        n->host_hdr_len = 0;
1338
    }
1339

    
1340
    qemu_format_nic_info_str(qemu_get_queue(n->nic), conf->macaddr.a);
1341

    
1342
    n->vqs[0].tx_waiting = 0;
1343
    n->tx_burst = net->txburst;
1344
    virtio_net_set_mrg_rx_bufs(n, 0);
1345
    n->promisc = 1; /* for compatibility */
1346

    
1347
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1348

    
1349
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1350

    
1351
    n->qdev = dev;
1352
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
1353
                    virtio_net_save, virtio_net_load, n);
1354

    
1355
    add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
1356

    
1357
    return &n->vdev;
1358
}
1359

    
1360
void virtio_net_exit(VirtIODevice *vdev)
1361
{
1362
    VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
1363
    int i;
1364

    
1365
    /* This will stop vhost backend if appropriate. */
1366
    virtio_net_set_status(vdev, 0);
1367

    
1368
    unregister_savevm(n->qdev, "virtio-net", n);
1369

    
1370
    g_free(n->mac_table.macs);
1371
    g_free(n->vlans);
1372

    
1373
    for (i = 0; i < n->max_queues; i++) {
1374
        VirtIONetQueue *q = &n->vqs[i];
1375
        NetClientState *nc = qemu_get_subqueue(n->nic, i);
1376

    
1377
        qemu_purge_queued_packets(nc);
1378

    
1379
        if (q->tx_timer) {
1380
            qemu_del_timer(q->tx_timer);
1381
            qemu_free_timer(q->tx_timer);
1382
        } else {
1383
            qemu_bh_delete(q->tx_bh);
1384
        }
1385
    }
1386

    
1387
    qemu_del_nic(n->nic);
1388
    virtio_cleanup(&n->vdev);
1389
}