Statistics
| Branch: | Revision:

root / net / netmap.c @ 57f45b62

History | View | Annotate | Download (13 kB)

1
/*
2
 * netmap access for qemu
3
 *
4
 * Copyright (c) 2012-2013 Luigi Rizzo
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a copy
7
 * of this software and associated documentation files (the "Software"), to deal
8
 * in the Software without restriction, including without limitation the rights
9
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
 * copies of the Software, and to permit persons to whom the Software is
11
 * furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
 * THE SOFTWARE.
23
 */
24

    
25

    
26
#include <sys/ioctl.h>
27
#include <net/if.h>
28
#include <sys/mman.h>
29
#include <stdint.h>
30
#include <stdio.h>
31
#define NETMAP_WITH_LIBS
32
#include <net/netmap.h>
33
#include <net/netmap_user.h>
34

    
35
#include "net/net.h"
36
#include "net/tap.h"
37
#include "clients.h"
38
#include "sysemu/sysemu.h"
39
#include "qemu/error-report.h"
40
#include "qemu/iov.h"
41

    
42
/* Private netmap device info. */
43
typedef struct NetmapPriv {
44
    int                 fd;
45
    size_t              memsize;
46
    void                *mem;
47
    struct netmap_if    *nifp;
48
    struct netmap_ring  *rx;
49
    struct netmap_ring  *tx;
50
    char                fdname[PATH_MAX];        /* Normally "/dev/netmap". */
51
    char                ifname[IFNAMSIZ];
52
} NetmapPriv;
53

    
54
typedef struct NetmapState {
55
    NetClientState      nc;
56
    NetmapPriv          me;
57
    bool                read_poll;
58
    bool                write_poll;
59
    struct iovec        iov[IOV_MAX];
60
    int                 vnet_hdr_len;  /* Current virtio-net header length. */
61
} NetmapState;
62

    
63
#ifndef __FreeBSD__
64
#define pkt_copy bcopy
65
#else
66
/* A fast copy routine only for multiples of 64 bytes, non overlapped. */
67
static inline void
68
pkt_copy(const void *_src, void *_dst, int l)
69
{
70
    const uint64_t *src = _src;
71
    uint64_t *dst = _dst;
72
    if (unlikely(l >= 1024)) {
73
        bcopy(src, dst, l);
74
        return;
75
    }
76
    for (; l > 0; l -= 64) {
77
        *dst++ = *src++;
78
        *dst++ = *src++;
79
        *dst++ = *src++;
80
        *dst++ = *src++;
81
        *dst++ = *src++;
82
        *dst++ = *src++;
83
        *dst++ = *src++;
84
        *dst++ = *src++;
85
    }
86
}
87
#endif /* __FreeBSD__ */
88

    
89
/*
90
 * Open a netmap device. We assume there is only one queue
91
 * (which is the case for the VALE bridge).
92
 */
93
static int netmap_open(NetmapPriv *me)
94
{
95
    int fd;
96
    int err;
97
    size_t l;
98
    struct nmreq req;
99

    
100
    me->fd = fd = open(me->fdname, O_RDWR);
101
    if (fd < 0) {
102
        error_report("Unable to open netmap device '%s' (%s)",
103
                        me->fdname, strerror(errno));
104
        return -1;
105
    }
106
    memset(&req, 0, sizeof(req));
107
    pstrcpy(req.nr_name, sizeof(req.nr_name), me->ifname);
108
    req.nr_ringid = NETMAP_NO_TX_POLL;
109
    req.nr_version = NETMAP_API;
110
    err = ioctl(fd, NIOCREGIF, &req);
111
    if (err) {
112
        error_report("Unable to register %s: %s", me->ifname, strerror(errno));
113
        goto error;
114
    }
115
    l = me->memsize = req.nr_memsize;
116

    
117
    me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
118
    if (me->mem == MAP_FAILED) {
119
        error_report("Unable to mmap netmap shared memory: %s",
120
                        strerror(errno));
121
        me->mem = NULL;
122
        goto error;
123
    }
124

    
125
    me->nifp = NETMAP_IF(me->mem, req.nr_offset);
126
    me->tx = NETMAP_TXRING(me->nifp, 0);
127
    me->rx = NETMAP_RXRING(me->nifp, 0);
128
    return 0;
129

    
130
error:
131
    close(me->fd);
132
    return -1;
133
}
134

    
135
/* Tell the event-loop if the netmap backend can send packets
136
   to the frontend. */
137
static int netmap_can_send(void *opaque)
138
{
139
    NetmapState *s = opaque;
140

    
141
    return qemu_can_send_packet(&s->nc);
142
}
143

    
144
static void netmap_send(void *opaque);
145
static void netmap_writable(void *opaque);
146

    
147
/* Set the event-loop handlers for the netmap backend. */
148
static void netmap_update_fd_handler(NetmapState *s)
149
{
150
    qemu_set_fd_handler2(s->me.fd,
151
                         s->read_poll  ? netmap_can_send : NULL,
152
                         s->read_poll  ? netmap_send     : NULL,
153
                         s->write_poll ? netmap_writable : NULL,
154
                         s);
155
}
156

    
157
/* Update the read handler. */
158
static void netmap_read_poll(NetmapState *s, bool enable)
159
{
160
    if (s->read_poll != enable) { /* Do nothing if not changed. */
161
        s->read_poll = enable;
162
        netmap_update_fd_handler(s);
163
    }
164
}
165

    
166
/* Update the write handler. */
167
static void netmap_write_poll(NetmapState *s, bool enable)
168
{
169
    if (s->write_poll != enable) {
170
        s->write_poll = enable;
171
        netmap_update_fd_handler(s);
172
    }
173
}
174

    
175
static void netmap_poll(NetClientState *nc, bool enable)
176
{
177
    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
178

    
179
    if (s->read_poll != enable || s->write_poll != enable) {
180
        s->read_poll = enable;
181
        s->read_poll = enable;
182
        netmap_update_fd_handler(s);
183
    }
184
}
185

    
186
/*
187
 * The fd_write() callback, invoked if the fd is marked as
188
 * writable after a poll. Unregister the handler and flush any
189
 * buffered packets.
190
 */
191
static void netmap_writable(void *opaque)
192
{
193
    NetmapState *s = opaque;
194

    
195
    netmap_write_poll(s, false);
196
    qemu_flush_queued_packets(&s->nc);
197
}
198

    
199
static ssize_t netmap_receive(NetClientState *nc,
200
      const uint8_t *buf, size_t size)
201
{
202
    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
203
    struct netmap_ring *ring = s->me.tx;
204
    uint32_t i;
205
    uint32_t idx;
206
    uint8_t *dst;
207

    
208
    if (unlikely(!ring)) {
209
        /* Drop. */
210
        return size;
211
    }
212

    
213
    if (unlikely(size > ring->nr_buf_size)) {
214
        RD(5, "[netmap_receive] drop packet of size %d > %d\n",
215
                                    (int)size, ring->nr_buf_size);
216
        return size;
217
    }
218

    
219
    if (nm_ring_empty(ring)) {
220
        /* No available slots in the netmap TX ring. */
221
        netmap_write_poll(s, true);
222
        return 0;
223
    }
224

    
225
    i = ring->cur;
226
    idx = ring->slot[i].buf_idx;
227
    dst = (uint8_t *)NETMAP_BUF(ring, idx);
228

    
229
    ring->slot[i].len = size;
230
    ring->slot[i].flags = 0;
231
    pkt_copy(buf, dst, size);
232
    ring->cur = ring->head = nm_ring_next(ring, i);
233
    ioctl(s->me.fd, NIOCTXSYNC, NULL);
234

    
235
    return size;
236
}
237

    
238
static ssize_t netmap_receive_iov(NetClientState *nc,
239
                    const struct iovec *iov, int iovcnt)
240
{
241
    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
242
    struct netmap_ring *ring = s->me.tx;
243
    uint32_t last;
244
    uint32_t idx;
245
    uint8_t *dst;
246
    int j;
247
    uint32_t i;
248

    
249
    if (unlikely(!ring)) {
250
        /* Drop the packet. */
251
        return iov_size(iov, iovcnt);
252
    }
253

    
254
    last = i = ring->cur;
255

    
256
    if (nm_ring_space(ring) < iovcnt) {
257
        /* Not enough netmap slots. */
258
        netmap_write_poll(s, true);
259
        return 0;
260
    }
261

    
262
    for (j = 0; j < iovcnt; j++) {
263
        int iov_frag_size = iov[j].iov_len;
264
        int offset = 0;
265
        int nm_frag_size;
266

    
267
        /* Split each iovec fragment over more netmap slots, if
268
           necessary. */
269
        while (iov_frag_size) {
270
            nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size);
271

    
272
            if (unlikely(nm_ring_empty(ring))) {
273
                /* We run out of netmap slots while splitting the
274
                   iovec fragments. */
275
                netmap_write_poll(s, true);
276
                return 0;
277
            }
278

    
279
            idx = ring->slot[i].buf_idx;
280
            dst = (uint8_t *)NETMAP_BUF(ring, idx);
281

    
282
            ring->slot[i].len = nm_frag_size;
283
            ring->slot[i].flags = NS_MOREFRAG;
284
            pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size);
285

    
286
            last = i;
287
            i = nm_ring_next(ring, i);
288

    
289
            offset += nm_frag_size;
290
            iov_frag_size -= nm_frag_size;
291
        }
292
    }
293
    /* The last slot must not have NS_MOREFRAG set. */
294
    ring->slot[last].flags &= ~NS_MOREFRAG;
295

    
296
    /* Now update ring->cur and ring->head. */
297
    ring->cur = ring->head = i;
298

    
299
    ioctl(s->me.fd, NIOCTXSYNC, NULL);
300

    
301
    return iov_size(iov, iovcnt);
302
}
303

    
304
/* Complete a previous send (backend --> guest) and enable the
305
   fd_read callback. */
306
static void netmap_send_completed(NetClientState *nc, ssize_t len)
307
{
308
    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
309

    
310
    netmap_read_poll(s, true);
311
}
312

    
313
static void netmap_send(void *opaque)
314
{
315
    NetmapState *s = opaque;
316
    struct netmap_ring *ring = s->me.rx;
317

    
318
    /* Keep sending while there are available packets into the netmap
319
       RX ring and the forwarding path towards the peer is open. */
320
    while (!nm_ring_empty(ring) && qemu_can_send_packet(&s->nc)) {
321
        uint32_t i;
322
        uint32_t idx;
323
        bool morefrag;
324
        int iovcnt = 0;
325
        int iovsize;
326

    
327
        do {
328
            i = ring->cur;
329
            idx = ring->slot[i].buf_idx;
330
            morefrag = (ring->slot[i].flags & NS_MOREFRAG);
331
            s->iov[iovcnt].iov_base = (u_char *)NETMAP_BUF(ring, idx);
332
            s->iov[iovcnt].iov_len = ring->slot[i].len;
333
            iovcnt++;
334

    
335
            ring->cur = ring->head = nm_ring_next(ring, i);
336
        } while (!nm_ring_empty(ring) && morefrag);
337

    
338
        if (unlikely(nm_ring_empty(ring) && morefrag)) {
339
            RD(5, "[netmap_send] ran out of slots, with a pending"
340
                   "incomplete packet\n");
341
        }
342

    
343
        iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt,
344
                                            netmap_send_completed);
345

    
346
        if (iovsize == 0) {
347
            /* The peer does not receive anymore. Packet is queued, stop
348
             * reading from the backend until netmap_send_completed()
349
             */
350
            netmap_read_poll(s, false);
351
            break;
352
        }
353
    }
354
}
355

    
356
/* Flush and close. */
357
static void netmap_cleanup(NetClientState *nc)
358
{
359
    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
360

    
361
    qemu_purge_queued_packets(nc);
362

    
363
    netmap_poll(nc, false);
364
    munmap(s->me.mem, s->me.memsize);
365
    close(s->me.fd);
366

    
367
    s->me.fd = -1;
368
}
369

    
370
/* Offloading manipulation support callbacks. */
371
static bool netmap_has_ufo(NetClientState *nc)
372
{
373
    return true;
374
}
375

    
376
static bool netmap_has_vnet_hdr(NetClientState *nc)
377
{
378
    return true;
379
}
380

    
381
static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len)
382
{
383
    return len == 0 || len == sizeof(struct virtio_net_hdr) ||
384
                len == sizeof(struct virtio_net_hdr_mrg_rxbuf);
385
}
386

    
387
static void netmap_using_vnet_hdr(NetClientState *nc, bool enable)
388
{
389
}
390

    
391
static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
392
{
393
    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
394
    int err;
395
    struct nmreq req;
396

    
397
    /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
398
     * length for the netmap adapter associated to 'me->ifname'.
399
     */
400
    memset(&req, 0, sizeof(req));
401
    pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname);
402
    req.nr_version = NETMAP_API;
403
    req.nr_cmd = NETMAP_BDG_VNET_HDR;
404
    req.nr_arg1 = len;
405
    err = ioctl(s->me.fd, NIOCREGIF, &req);
406
    if (err) {
407
        error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s",
408
                     s->me.ifname, strerror(errno));
409
    } else {
410
        /* Keep track of the current length. */
411
        s->vnet_hdr_len = len;
412
    }
413
}
414

    
415
static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
416
                               int ecn, int ufo)
417
{
418
    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
419

    
420
    /* Setting a virtio-net header length greater than zero automatically
421
     * enables the offloadings.
422
     */
423
    if (!s->vnet_hdr_len) {
424
        netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
425
    }
426
}
427

    
428
/* NetClientInfo methods */
429
static NetClientInfo net_netmap_info = {
430
    .type = NET_CLIENT_OPTIONS_KIND_NETMAP,
431
    .size = sizeof(NetmapState),
432
    .receive = netmap_receive,
433
    .receive_iov = netmap_receive_iov,
434
    .poll = netmap_poll,
435
    .cleanup = netmap_cleanup,
436
    .has_ufo = netmap_has_ufo,
437
    .has_vnet_hdr = netmap_has_vnet_hdr,
438
    .has_vnet_hdr_len = netmap_has_vnet_hdr_len,
439
    .using_vnet_hdr = netmap_using_vnet_hdr,
440
    .set_offload = netmap_set_offload,
441
    .set_vnet_hdr_len = netmap_set_vnet_hdr_len,
442
};
443

    
444
/* The exported init function
445
 *
446
 * ... -net netmap,ifname="..."
447
 */
448
int net_init_netmap(const NetClientOptions *opts,
449
        const char *name, NetClientState *peer)
450
{
451
    const NetdevNetmapOptions *netmap_opts = opts->netmap;
452
    NetClientState *nc;
453
    NetmapPriv me;
454
    NetmapState *s;
455

    
456
    pstrcpy(me.fdname, sizeof(me.fdname),
457
        netmap_opts->has_devname ? netmap_opts->devname : "/dev/netmap");
458
    /* Set default name for the port if not supplied. */
459
    pstrcpy(me.ifname, sizeof(me.ifname), netmap_opts->ifname);
460
    if (netmap_open(&me)) {
461
        return -1;
462
    }
463
    /* Create the object. */
464
    nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name);
465
    s = DO_UPCAST(NetmapState, nc, nc);
466
    s->me = me;
467
    s->vnet_hdr_len = 0;
468
    netmap_read_poll(s, true); /* Initially only poll for reads. */
469

    
470
    return 0;
471
}
472