Statistics
| Branch: | Revision:

root / hw / virtio-net.c @ 5430a28f

History | View | Annotate | Download (31.2 kB)

1
/*
2
 * Virtio Network Device
3
 *
4
 * Copyright IBM, Corp. 2007
5
 *
6
 * Authors:
7
 *  Anthony Liguori   <aliguori@us.ibm.com>
8
 *
9
 * This work is licensed under the terms of the GNU GPL, version 2.  See
10
 * the COPYING file in the top-level directory.
11
 *
12
 */
13

    
14
#include "iov.h"
15
#include "virtio.h"
16
#include "net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19
#include "qemu-error.h"
20
#include "qemu-timer.h"
21
#include "virtio-net.h"
22
#include "vhost_net.h"
23

    
24
#define VIRTIO_NET_VM_VERSION    11
25

    
26
#define MAC_TABLE_ENTRIES    64
27
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
28

    
29
typedef struct VirtIONet
30
{
31
    VirtIODevice vdev;
32
    uint8_t mac[ETH_ALEN];
33
    uint16_t status;
34
    VirtQueue *rx_vq;
35
    VirtQueue *tx_vq;
36
    VirtQueue *ctrl_vq;
37
    NICState *nic;
38
    QEMUTimer *tx_timer;
39
    QEMUBH *tx_bh;
40
    uint32_t tx_timeout;
41
    int32_t tx_burst;
42
    int tx_waiting;
43
    uint32_t has_vnet_hdr;
44
    uint8_t has_ufo;
45
    struct {
46
        VirtQueueElement elem;
47
        ssize_t len;
48
    } async_tx;
49
    int mergeable_rx_bufs;
50
    uint8_t promisc;
51
    uint8_t allmulti;
52
    uint8_t alluni;
53
    uint8_t nomulti;
54
    uint8_t nouni;
55
    uint8_t nobcast;
56
    uint8_t vhost_started;
57
    struct {
58
        int in_use;
59
        int first_multi;
60
        uint8_t multi_overflow;
61
        uint8_t uni_overflow;
62
        uint8_t *macs;
63
    } mac_table;
64
    uint32_t *vlans;
65
    DeviceState *qdev;
66
} VirtIONet;
67

    
68
/* TODO
69
 * - we could suppress RX interrupt if we were so inclined.
70
 */
71

    
72
static VirtIONet *to_virtio_net(VirtIODevice *vdev)
73
{
74
    return (VirtIONet *)vdev;
75
}
76

    
77
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
78
{
79
    VirtIONet *n = to_virtio_net(vdev);
80
    struct virtio_net_config netcfg;
81

    
82
    netcfg.status = lduw_p(&n->status);
83
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
84
    memcpy(config, &netcfg, sizeof(netcfg));
85
}
86

    
87
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
88
{
89
    VirtIONet *n = to_virtio_net(vdev);
90
    struct virtio_net_config netcfg;
91

    
92
    memcpy(&netcfg, config, sizeof(netcfg));
93

    
94
    if (memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
95
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
96
        qemu_format_nic_info_str(&n->nic->nc, n->mac);
97
    }
98
}
99

    
100
static bool virtio_net_started(VirtIONet *n, uint8_t status)
101
{
102
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
103
        (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
104
}
105

    
106
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
107
{
108
    if (!n->nic->nc.peer) {
109
        return;
110
    }
111
    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
112
        return;
113
    }
114

    
115
    if (!tap_get_vhost_net(n->nic->nc.peer)) {
116
        return;
117
    }
118
    if (!!n->vhost_started == virtio_net_started(n, status)) {
119
        return;
120
    }
121
    if (!n->vhost_started) {
122
        int r;
123
        if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
124
            return;
125
        }
126
        r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
127
        if (r < 0) {
128
            error_report("unable to start vhost net: %d: "
129
                         "falling back on userspace virtio", -r);
130
        } else {
131
            n->vhost_started = 1;
132
        }
133
    } else {
134
        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
135
        n->vhost_started = 0;
136
    }
137
}
138

    
139
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
140
{
141
    VirtIONet *n = to_virtio_net(vdev);
142

    
143
    virtio_net_vhost_status(n, status);
144

    
145
    if (!n->tx_waiting) {
146
        return;
147
    }
148

    
149
    if (virtio_net_started(n, status) && !n->vhost_started) {
150
        if (n->tx_timer) {
151
            qemu_mod_timer(n->tx_timer,
152
                           qemu_get_clock(vm_clock) + n->tx_timeout);
153
        } else {
154
            qemu_bh_schedule(n->tx_bh);
155
        }
156
    } else {
157
        if (n->tx_timer) {
158
            qemu_del_timer(n->tx_timer);
159
        } else {
160
            qemu_bh_cancel(n->tx_bh);
161
        }
162
    }
163
}
164

    
165
static void virtio_net_set_link_status(VLANClientState *nc)
166
{
167
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
168
    uint16_t old_status = n->status;
169

    
170
    if (nc->link_down)
171
        n->status &= ~VIRTIO_NET_S_LINK_UP;
172
    else
173
        n->status |= VIRTIO_NET_S_LINK_UP;
174

    
175
    if (n->status != old_status)
176
        virtio_notify_config(&n->vdev);
177

    
178
    virtio_net_set_status(&n->vdev, n->vdev.status);
179
}
180

    
181
static void virtio_net_reset(VirtIODevice *vdev)
182
{
183
    VirtIONet *n = to_virtio_net(vdev);
184

    
185
    /* Reset back to compatibility mode */
186
    n->promisc = 1;
187
    n->allmulti = 0;
188
    n->alluni = 0;
189
    n->nomulti = 0;
190
    n->nouni = 0;
191
    n->nobcast = 0;
192

    
193
    /* Flush any MAC and VLAN filter table state */
194
    n->mac_table.in_use = 0;
195
    n->mac_table.first_multi = 0;
196
    n->mac_table.multi_overflow = 0;
197
    n->mac_table.uni_overflow = 0;
198
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
199
    memset(n->vlans, 0, MAX_VLAN >> 3);
200
}
201

    
202
static int peer_has_vnet_hdr(VirtIONet *n)
203
{
204
    if (!n->nic->nc.peer)
205
        return 0;
206

    
207
    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP)
208
        return 0;
209

    
210
    n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
211

    
212
    return n->has_vnet_hdr;
213
}
214

    
215
static int peer_has_ufo(VirtIONet *n)
216
{
217
    if (!peer_has_vnet_hdr(n))
218
        return 0;
219

    
220
    n->has_ufo = tap_has_ufo(n->nic->nc.peer);
221

    
222
    return n->has_ufo;
223
}
224

    
225
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
226
{
227
    VirtIONet *n = to_virtio_net(vdev);
228

    
229
    features |= (1 << VIRTIO_NET_F_MAC);
230

    
231
    if (peer_has_vnet_hdr(n)) {
232
        tap_using_vnet_hdr(n->nic->nc.peer, 1);
233
    } else {
234
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
235
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
236
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
237
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
238

    
239
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
240
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
241
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
242
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
243
    }
244

    
245
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
246
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
247
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
248
    }
249

    
250
    if (!n->nic->nc.peer ||
251
        n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
252
        return features;
253
    }
254
    if (!tap_get_vhost_net(n->nic->nc.peer)) {
255
        return features;
256
    }
257
    return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer), features);
258
}
259

    
260
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
261
{
262
    uint32_t features = 0;
263

    
264
    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
265
     * but also these: */
266
    features |= (1 << VIRTIO_NET_F_MAC);
267
    features |= (1 << VIRTIO_NET_F_CSUM);
268
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
269
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
270
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
271

    
272
    return features;
273
}
274

    
275
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
276
{
277
    VirtIONet *n = to_virtio_net(vdev);
278

    
279
    n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
280

    
281
    if (n->has_vnet_hdr) {
282
        tap_set_offload(n->nic->nc.peer,
283
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
284
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
285
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
286
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
287
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
288
    }
289
    if (!n->nic->nc.peer ||
290
        n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
291
        return;
292
    }
293
    if (!tap_get_vhost_net(n->nic->nc.peer)) {
294
        return;
295
    }
296
    vhost_net_ack_features(tap_get_vhost_net(n->nic->nc.peer), features);
297
}
298

    
299
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
300
                                     VirtQueueElement *elem)
301
{
302
    uint8_t on;
303

    
304
    if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(on)) {
305
        error_report("virtio-net ctrl invalid rx mode command");
306
        exit(1);
307
    }
308

    
309
    on = ldub_p(elem->out_sg[1].iov_base);
310

    
311
    if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
312
        n->promisc = on;
313
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
314
        n->allmulti = on;
315
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
316
        n->alluni = on;
317
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
318
        n->nomulti = on;
319
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
320
        n->nouni = on;
321
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
322
        n->nobcast = on;
323
    else
324
        return VIRTIO_NET_ERR;
325

    
326
    return VIRTIO_NET_OK;
327
}
328

    
329
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
330
                                 VirtQueueElement *elem)
331
{
332
    struct virtio_net_ctrl_mac mac_data;
333

    
334
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem->out_num != 3 ||
335
        elem->out_sg[1].iov_len < sizeof(mac_data) ||
336
        elem->out_sg[2].iov_len < sizeof(mac_data))
337
        return VIRTIO_NET_ERR;
338

    
339
    n->mac_table.in_use = 0;
340
    n->mac_table.first_multi = 0;
341
    n->mac_table.uni_overflow = 0;
342
    n->mac_table.multi_overflow = 0;
343
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
344

    
345
    mac_data.entries = ldl_p(elem->out_sg[1].iov_base);
346

    
347
    if (sizeof(mac_data.entries) +
348
        (mac_data.entries * ETH_ALEN) > elem->out_sg[1].iov_len)
349
        return VIRTIO_NET_ERR;
350

    
351
    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
352
        memcpy(n->mac_table.macs, elem->out_sg[1].iov_base + sizeof(mac_data),
353
               mac_data.entries * ETH_ALEN);
354
        n->mac_table.in_use += mac_data.entries;
355
    } else {
356
        n->mac_table.uni_overflow = 1;
357
    }
358

    
359
    n->mac_table.first_multi = n->mac_table.in_use;
360

    
361
    mac_data.entries = ldl_p(elem->out_sg[2].iov_base);
362

    
363
    if (sizeof(mac_data.entries) +
364
        (mac_data.entries * ETH_ALEN) > elem->out_sg[2].iov_len)
365
        return VIRTIO_NET_ERR;
366

    
367
    if (mac_data.entries) {
368
        if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
369
            memcpy(n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
370
                   elem->out_sg[2].iov_base + sizeof(mac_data),
371
                   mac_data.entries * ETH_ALEN);
372
            n->mac_table.in_use += mac_data.entries;
373
        } else {
374
            n->mac_table.multi_overflow = 1;
375
        }
376
    }
377

    
378
    return VIRTIO_NET_OK;
379
}
380

    
381
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
382
                                        VirtQueueElement *elem)
383
{
384
    uint16_t vid;
385

    
386
    if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(vid)) {
387
        error_report("virtio-net ctrl invalid vlan command");
388
        return VIRTIO_NET_ERR;
389
    }
390

    
391
    vid = lduw_p(elem->out_sg[1].iov_base);
392

    
393
    if (vid >= MAX_VLAN)
394
        return VIRTIO_NET_ERR;
395

    
396
    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
397
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
398
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
399
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
400
    else
401
        return VIRTIO_NET_ERR;
402

    
403
    return VIRTIO_NET_OK;
404
}
405

    
406
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
407
{
408
    VirtIONet *n = to_virtio_net(vdev);
409
    struct virtio_net_ctrl_hdr ctrl;
410
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
411
    VirtQueueElement elem;
412

    
413
    while (virtqueue_pop(vq, &elem)) {
414
        if ((elem.in_num < 1) || (elem.out_num < 1)) {
415
            error_report("virtio-net ctrl missing headers");
416
            exit(1);
417
        }
418

    
419
        if (elem.out_sg[0].iov_len < sizeof(ctrl) ||
420
            elem.in_sg[elem.in_num - 1].iov_len < sizeof(status)) {
421
            error_report("virtio-net ctrl header not in correct element");
422
            exit(1);
423
        }
424

    
425
        ctrl.class = ldub_p(elem.out_sg[0].iov_base);
426
        ctrl.cmd = ldub_p(elem.out_sg[0].iov_base + sizeof(ctrl.class));
427

    
428
        if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE)
429
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, &elem);
430
        else if (ctrl.class == VIRTIO_NET_CTRL_MAC)
431
            status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
432
        else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
433
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
434

    
435
        stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
436

    
437
        virtqueue_push(vq, &elem, sizeof(status));
438
        virtio_notify(vdev, vq);
439
    }
440
}
441

    
442
/* RX */
443

    
444
static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
445
{
446
    VirtIONet *n = to_virtio_net(vdev);
447

    
448
    qemu_flush_queued_packets(&n->nic->nc);
449

    
450
    /* We now have RX buffers, signal to the IO thread to break out of the
451
     * select to re-poll the tap file descriptor */
452
    qemu_notify_event();
453
}
454

    
455
static int virtio_net_can_receive(VLANClientState *nc)
456
{
457
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
458
    if (!n->vdev.vm_running) {
459
        return 0;
460
    }
461

    
462
    if (!virtio_queue_ready(n->rx_vq) ||
463
        !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
464
        return 0;
465

    
466
    return 1;
467
}
468

    
469
static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
470
{
471
    if (virtio_queue_empty(n->rx_vq) ||
472
        (n->mergeable_rx_bufs &&
473
         !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
474
        virtio_queue_set_notification(n->rx_vq, 1);
475

    
476
        /* To avoid a race condition where the guest has made some buffers
477
         * available after the above check but before notification was
478
         * enabled, check for available buffers again.
479
         */
480
        if (virtio_queue_empty(n->rx_vq) ||
481
            (n->mergeable_rx_bufs &&
482
             !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
483
            return 0;
484
    }
485

    
486
    virtio_queue_set_notification(n->rx_vq, 0);
487
    return 1;
488
}
489

    
490
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
491
 * it never finds out that the packets don't have valid checksums.  This
492
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
493
 * fix this with Xen but it hasn't appeared in an upstream release of
494
 * dhclient yet.
495
 *
496
 * To avoid breaking existing guests, we catch udp packets and add
497
 * checksums.  This is terrible but it's better than hacking the guest
498
 * kernels.
499
 *
500
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
501
 * we should provide a mechanism to disable it to avoid polluting the host
502
 * cache.
503
 */
504
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
505
                                        const uint8_t *buf, size_t size)
506
{
507
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
508
        (size > 27 && size < 1500) && /* normal sized MTU */
509
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
510
        (buf[23] == 17) && /* ip.protocol == UDP */
511
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
512
        /* FIXME this cast is evil */
513
        net_checksum_calculate((uint8_t *)buf, size);
514
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
515
    }
516
}
517

    
518
static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
519
                          const void *buf, size_t size, size_t hdr_len)
520
{
521
    struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)iov[0].iov_base;
522
    int offset = 0;
523

    
524
    hdr->flags = 0;
525
    hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
526

    
527
    if (n->has_vnet_hdr) {
528
        memcpy(hdr, buf, sizeof(*hdr));
529
        offset = sizeof(*hdr);
530
        work_around_broken_dhclient(hdr, buf + offset, size - offset);
531
    }
532

    
533
    /* We only ever receive a struct virtio_net_hdr from the tapfd,
534
     * but we may be passing along a larger header to the guest.
535
     */
536
    iov[0].iov_base += hdr_len;
537
    iov[0].iov_len  -= hdr_len;
538

    
539
    return offset;
540
}
541

    
542
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
543
{
544
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
545
    static const uint8_t vlan[] = {0x81, 0x00};
546
    uint8_t *ptr = (uint8_t *)buf;
547
    int i;
548

    
549
    if (n->promisc)
550
        return 1;
551

    
552
    if (n->has_vnet_hdr) {
553
        ptr += sizeof(struct virtio_net_hdr);
554
    }
555

    
556
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
557
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
558
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
559
            return 0;
560
    }
561

    
562
    if (ptr[0] & 1) { // multicast
563
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
564
            return !n->nobcast;
565
        } else if (n->nomulti) {
566
            return 0;
567
        } else if (n->allmulti || n->mac_table.multi_overflow) {
568
            return 1;
569
        }
570

    
571
        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
572
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
573
                return 1;
574
            }
575
        }
576
    } else { // unicast
577
        if (n->nouni) {
578
            return 0;
579
        } else if (n->alluni || n->mac_table.uni_overflow) {
580
            return 1;
581
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
582
            return 1;
583
        }
584

    
585
        for (i = 0; i < n->mac_table.first_multi; i++) {
586
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
587
                return 1;
588
            }
589
        }
590
    }
591

    
592
    return 0;
593
}
594

    
595
static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
596
{
597
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
598
    struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
599
    size_t guest_hdr_len, offset, i, host_hdr_len;
600

    
601
    if (!virtio_net_can_receive(&n->nic->nc))
602
        return -1;
603

    
604
    /* hdr_len refers to the header we supply to the guest */
605
    guest_hdr_len = n->mergeable_rx_bufs ?
606
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
607

    
608

    
609
    host_hdr_len = n->has_vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
610
    if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len))
611
        return 0;
612

    
613
    if (!receive_filter(n, buf, size))
614
        return size;
615

    
616
    offset = i = 0;
617

    
618
    while (offset < size) {
619
        VirtQueueElement elem;
620
        int len, total;
621
        struct iovec sg[VIRTQUEUE_MAX_SIZE];
622

    
623
        total = 0;
624

    
625
        if (virtqueue_pop(n->rx_vq, &elem) == 0) {
626
            if (i == 0)
627
                return -1;
628
            error_report("virtio-net unexpected empty queue: "
629
                    "i %zd mergeable %d offset %zd, size %zd, "
630
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x",
631
                    i, n->mergeable_rx_bufs, offset, size,
632
                    guest_hdr_len, host_hdr_len, n->vdev.guest_features);
633
            exit(1);
634
        }
635

    
636
        if (elem.in_num < 1) {
637
            error_report("virtio-net receive queue contains no in buffers");
638
            exit(1);
639
        }
640

    
641
        if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != guest_hdr_len) {
642
            error_report("virtio-net header not in first element");
643
            exit(1);
644
        }
645

    
646
        memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);
647

    
648
        if (i == 0) {
649
            if (n->mergeable_rx_bufs)
650
                mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
651

    
652
            offset += receive_header(n, sg, elem.in_num,
653
                                     buf + offset, size - offset, guest_hdr_len);
654
            total += guest_hdr_len;
655
        }
656

    
657
        /* copy in packet.  ugh */
658
        len = iov_from_buf(sg, elem.in_num,
659
                           buf + offset, size - offset);
660
        total += len;
661
        offset += len;
662
        /* If buffers can't be merged, at this point we
663
         * must have consumed the complete packet.
664
         * Otherwise, drop it. */
665
        if (!n->mergeable_rx_bufs && offset < size) {
666
#if 0
667
            error_report("virtio-net truncated non-mergeable packet: "
668
                         "i %zd mergeable %d offset %zd, size %zd, "
669
                         "guest hdr len %zd, host hdr len %zd",
670
                         i, n->mergeable_rx_bufs,
671
                         offset, size, guest_hdr_len, host_hdr_len);
672
#endif
673
            return size;
674
        }
675

    
676
        /* signal other side */
677
        virtqueue_fill(n->rx_vq, &elem, total, i++);
678
    }
679

    
680
    if (mhdr) {
681
        mhdr->num_buffers = lduw_p(&i);
682
    }
683

    
684
    virtqueue_flush(n->rx_vq, i);
685
    virtio_notify(&n->vdev, n->rx_vq);
686

    
687
    return size;
688
}
689

    
690
static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
691

    
692
static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
693
{
694
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
695

    
696
    virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
697
    virtio_notify(&n->vdev, n->tx_vq);
698

    
699
    n->async_tx.elem.out_num = n->async_tx.len = 0;
700

    
701
    virtio_queue_set_notification(n->tx_vq, 1);
702
    virtio_net_flush_tx(n, n->tx_vq);
703
}
704

    
705
/* TX */
706
static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
707
{
708
    VirtQueueElement elem;
709
    int32_t num_packets = 0;
710
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
711
        return num_packets;
712
    }
713

    
714
    assert(n->vdev.vm_running);
715

    
716
    if (n->async_tx.elem.out_num) {
717
        virtio_queue_set_notification(n->tx_vq, 0);
718
        return num_packets;
719
    }
720

    
721
    while (virtqueue_pop(vq, &elem)) {
722
        ssize_t ret, len = 0;
723
        unsigned int out_num = elem.out_num;
724
        struct iovec *out_sg = &elem.out_sg[0];
725
        unsigned hdr_len;
726

    
727
        /* hdr_len refers to the header received from the guest */
728
        hdr_len = n->mergeable_rx_bufs ?
729
            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
730
            sizeof(struct virtio_net_hdr);
731

    
732
        if (out_num < 1 || out_sg->iov_len != hdr_len) {
733
            error_report("virtio-net header not in first element");
734
            exit(1);
735
        }
736

    
737
        /* ignore the header if GSO is not supported */
738
        if (!n->has_vnet_hdr) {
739
            out_num--;
740
            out_sg++;
741
            len += hdr_len;
742
        } else if (n->mergeable_rx_bufs) {
743
            /* tapfd expects a struct virtio_net_hdr */
744
            hdr_len -= sizeof(struct virtio_net_hdr);
745
            out_sg->iov_len -= hdr_len;
746
            len += hdr_len;
747
        }
748

    
749
        ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
750
                                      virtio_net_tx_complete);
751
        if (ret == 0) {
752
            virtio_queue_set_notification(n->tx_vq, 0);
753
            n->async_tx.elem = elem;
754
            n->async_tx.len  = len;
755
            return -EBUSY;
756
        }
757

    
758
        len += ret;
759

    
760
        virtqueue_push(vq, &elem, len);
761
        virtio_notify(&n->vdev, vq);
762

    
763
        if (++num_packets >= n->tx_burst) {
764
            break;
765
        }
766
    }
767
    return num_packets;
768
}
769

    
770
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
771
{
772
    VirtIONet *n = to_virtio_net(vdev);
773

    
774
    /* This happens when device was stopped but VCPU wasn't. */
775
    if (!n->vdev.vm_running) {
776
        n->tx_waiting = 1;
777
        return;
778
    }
779

    
780
    if (n->tx_waiting) {
781
        virtio_queue_set_notification(vq, 1);
782
        qemu_del_timer(n->tx_timer);
783
        n->tx_waiting = 0;
784
        virtio_net_flush_tx(n, vq);
785
    } else {
786
        qemu_mod_timer(n->tx_timer,
787
                       qemu_get_clock(vm_clock) + n->tx_timeout);
788
        n->tx_waiting = 1;
789
        virtio_queue_set_notification(vq, 0);
790
    }
791
}
792

    
793
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
794
{
795
    VirtIONet *n = to_virtio_net(vdev);
796

    
797
    if (unlikely(n->tx_waiting)) {
798
        return;
799
    }
800
    n->tx_waiting = 1;
801
    /* This happens when device was stopped but VCPU wasn't. */
802
    if (!n->vdev.vm_running) {
803
        return;
804
    }
805
    virtio_queue_set_notification(vq, 0);
806
    qemu_bh_schedule(n->tx_bh);
807
}
808

    
809
static void virtio_net_tx_timer(void *opaque)
810
{
811
    VirtIONet *n = opaque;
812
    assert(n->vdev.vm_running);
813

    
814
    n->tx_waiting = 0;
815

    
816
    /* Just in case the driver is not ready on more */
817
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
818
        return;
819

    
820
    virtio_queue_set_notification(n->tx_vq, 1);
821
    virtio_net_flush_tx(n, n->tx_vq);
822
}
823

    
824
static void virtio_net_tx_bh(void *opaque)
825
{
826
    VirtIONet *n = opaque;
827
    int32_t ret;
828

    
829
    assert(n->vdev.vm_running);
830

    
831
    n->tx_waiting = 0;
832

    
833
    /* Just in case the driver is not ready on more */
834
    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
835
        return;
836

    
837
    ret = virtio_net_flush_tx(n, n->tx_vq);
838
    if (ret == -EBUSY) {
839
        return; /* Notification re-enable handled by tx_complete */
840
    }
841

    
842
    /* If we flush a full burst of packets, assume there are
843
     * more coming and immediately reschedule */
844
    if (ret >= n->tx_burst) {
845
        qemu_bh_schedule(n->tx_bh);
846
        n->tx_waiting = 1;
847
        return;
848
    }
849

    
850
    /* If less than a full burst, re-enable notification and flush
851
     * anything that may have come in while we weren't looking.  If
852
     * we find something, assume the guest is still active and reschedule */
853
    virtio_queue_set_notification(n->tx_vq, 1);
854
    if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
855
        virtio_queue_set_notification(n->tx_vq, 0);
856
        qemu_bh_schedule(n->tx_bh);
857
        n->tx_waiting = 1;
858
    }
859
}
860

    
861
static void virtio_net_save(QEMUFile *f, void *opaque)
862
{
863
    VirtIONet *n = opaque;
864

    
865
    /* At this point, backend must be stopped, otherwise
866
     * it might keep writing to memory. */
867
    assert(!n->vhost_started);
868
    virtio_save(&n->vdev, f);
869

    
870
    qemu_put_buffer(f, n->mac, ETH_ALEN);
871
    qemu_put_be32(f, n->tx_waiting);
872
    qemu_put_be32(f, n->mergeable_rx_bufs);
873
    qemu_put_be16(f, n->status);
874
    qemu_put_byte(f, n->promisc);
875
    qemu_put_byte(f, n->allmulti);
876
    qemu_put_be32(f, n->mac_table.in_use);
877
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
878
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
879
    qemu_put_be32(f, n->has_vnet_hdr);
880
    qemu_put_byte(f, n->mac_table.multi_overflow);
881
    qemu_put_byte(f, n->mac_table.uni_overflow);
882
    qemu_put_byte(f, n->alluni);
883
    qemu_put_byte(f, n->nomulti);
884
    qemu_put_byte(f, n->nouni);
885
    qemu_put_byte(f, n->nobcast);
886
    qemu_put_byte(f, n->has_ufo);
887
}
888

    
889
static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
890
{
891
    VirtIONet *n = opaque;
892
    int i;
893

    
894
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
895
        return -EINVAL;
896

    
897
    virtio_load(&n->vdev, f);
898

    
899
    qemu_get_buffer(f, n->mac, ETH_ALEN);
900
    n->tx_waiting = qemu_get_be32(f);
901
    n->mergeable_rx_bufs = qemu_get_be32(f);
902

    
903
    if (version_id >= 3)
904
        n->status = qemu_get_be16(f);
905

    
906
    if (version_id >= 4) {
907
        if (version_id < 8) {
908
            n->promisc = qemu_get_be32(f);
909
            n->allmulti = qemu_get_be32(f);
910
        } else {
911
            n->promisc = qemu_get_byte(f);
912
            n->allmulti = qemu_get_byte(f);
913
        }
914
    }
915

    
916
    if (version_id >= 5) {
917
        n->mac_table.in_use = qemu_get_be32(f);
918
        /* MAC_TABLE_ENTRIES may be different from the saved image */
919
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
920
            qemu_get_buffer(f, n->mac_table.macs,
921
                            n->mac_table.in_use * ETH_ALEN);
922
        } else if (n->mac_table.in_use) {
923
            qemu_fseek(f, n->mac_table.in_use * ETH_ALEN, SEEK_CUR);
924
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
925
            n->mac_table.in_use = 0;
926
        }
927
    }
928
 
929
    if (version_id >= 6)
930
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
931

    
932
    if (version_id >= 7) {
933
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
934
            error_report("virtio-net: saved image requires vnet_hdr=on");
935
            return -1;
936
        }
937

    
938
        if (n->has_vnet_hdr) {
939
            tap_using_vnet_hdr(n->nic->nc.peer, 1);
940
            tap_set_offload(n->nic->nc.peer,
941
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
942
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
943
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
944
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
945
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
946
        }
947
    }
948

    
949
    if (version_id >= 9) {
950
        n->mac_table.multi_overflow = qemu_get_byte(f);
951
        n->mac_table.uni_overflow = qemu_get_byte(f);
952
    }
953

    
954
    if (version_id >= 10) {
955
        n->alluni = qemu_get_byte(f);
956
        n->nomulti = qemu_get_byte(f);
957
        n->nouni = qemu_get_byte(f);
958
        n->nobcast = qemu_get_byte(f);
959
    }
960

    
961
    if (version_id >= 11) {
962
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
963
            error_report("virtio-net: saved image requires TUN_F_UFO support");
964
            return -1;
965
        }
966
    }
967

    
968
    /* Find the first multicast entry in the saved MAC filter */
969
    for (i = 0; i < n->mac_table.in_use; i++) {
970
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
971
            break;
972
        }
973
    }
974
    n->mac_table.first_multi = i;
975
    return 0;
976
}
977

    
978
static void virtio_net_cleanup(VLANClientState *nc)
979
{
980
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
981

    
982
    n->nic = NULL;
983
}
984

    
985
static NetClientInfo net_virtio_info = {
986
    .type = NET_CLIENT_TYPE_NIC,
987
    .size = sizeof(NICState),
988
    .can_receive = virtio_net_can_receive,
989
    .receive = virtio_net_receive,
990
        .cleanup = virtio_net_cleanup,
991
    .link_status_changed = virtio_net_set_link_status,
992
};
993

    
994
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
995
                              virtio_net_conf *net)
996
{
997
    VirtIONet *n;
998

    
999
    n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
1000
                                        sizeof(struct virtio_net_config),
1001
                                        sizeof(VirtIONet));
1002

    
1003
    n->vdev.get_config = virtio_net_get_config;
1004
    n->vdev.set_config = virtio_net_set_config;
1005
    n->vdev.get_features = virtio_net_get_features;
1006
    n->vdev.set_features = virtio_net_set_features;
1007
    n->vdev.bad_features = virtio_net_bad_features;
1008
    n->vdev.reset = virtio_net_reset;
1009
    n->vdev.set_status = virtio_net_set_status;
1010
    n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
1011

    
1012
    if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
1013
        error_report("virtio-net: "
1014
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1015
                     net->tx);
1016
        error_report("Defaulting to \"bh\"");
1017
    }
1018

    
1019
    if (net->tx && !strcmp(net->tx, "timer")) {
1020
        n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
1021
        n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
1022
        n->tx_timeout = net->txtimer;
1023
    } else {
1024
        n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
1025
        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
1026
    }
1027
    n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
1028
    qemu_macaddr_default_if_unset(&conf->macaddr);
1029
    memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
1030
    n->status = VIRTIO_NET_S_LINK_UP;
1031

    
1032
    n->nic = qemu_new_nic(&net_virtio_info, conf, dev->info->name, dev->id, n);
1033

    
1034
    qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
1035

    
1036
    n->tx_waiting = 0;
1037
    n->tx_burst = net->txburst;
1038
    n->mergeable_rx_bufs = 0;
1039
    n->promisc = 1; /* for compatibility */
1040

    
1041
    n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
1042

    
1043
    n->vlans = qemu_mallocz(MAX_VLAN >> 3);
1044

    
1045
    n->qdev = dev;
1046
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
1047
                    virtio_net_save, virtio_net_load, n);
1048

    
1049
    add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
1050

    
1051
    return &n->vdev;
1052
}
1053

    
1054
void virtio_net_exit(VirtIODevice *vdev)
1055
{
1056
    VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
1057

    
1058
    /* This will stop vhost backend if appropriate. */
1059
    virtio_net_set_status(vdev, 0);
1060

    
1061
    qemu_purge_queued_packets(&n->nic->nc);
1062

    
1063
    unregister_savevm(n->qdev, "virtio-net", n);
1064

    
1065
    qemu_free(n->mac_table.macs);
1066
    qemu_free(n->vlans);
1067

    
1068
    if (n->tx_timer) {
1069
        qemu_del_timer(n->tx_timer);
1070
        qemu_free_timer(n->tx_timer);
1071
    } else {
1072
        qemu_bh_delete(n->tx_bh);
1073
    }
1074

    
1075
    virtio_cleanup(&n->vdev);
1076
    qemu_del_vlan_client(&n->nic->nc);
1077
}