Statistics
| Branch: | Revision:

root / net / tap.c @ c28b1c10

History | View | Annotate | Download (12.5 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 * Copyright (c) 2009 Red Hat, Inc.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "net/tap.h"
27

    
28
#include "config-host.h"
29

    
30
#include <signal.h>
31
#include <sys/ioctl.h>
32
#include <sys/stat.h>
33
#include <sys/wait.h>
34
#include <net/if.h>
35

    
36
#include "net.h"
37
#include "sysemu.h"
38
#include "qemu-char.h"
39
#include "qemu-common.h"
40

    
41
#include "net/tap-linux.h"
42

    
43
/* Maximum GSO packet size (64k) plus plenty of room for
44
 * the ethernet and virtio_net headers
45
 */
46
#define TAP_BUFSIZE (4096 + 65536)
47

    
48
typedef struct TAPState {
49
    VLANClientState *vc;
50
    int fd;
51
    char down_script[1024];
52
    char down_script_arg[128];
53
    uint8_t buf[TAP_BUFSIZE];
54
    unsigned int read_poll : 1;
55
    unsigned int write_poll : 1;
56
    unsigned int has_vnet_hdr : 1;
57
    unsigned int using_vnet_hdr : 1;
58
    unsigned int has_ufo: 1;
59
} TAPState;
60

    
61
static int launch_script(const char *setup_script, const char *ifname, int fd);
62

    
63
static int tap_can_send(void *opaque);
64
static void tap_send(void *opaque);
65
static void tap_writable(void *opaque);
66

    
67
static void tap_update_fd_handler(TAPState *s)
68
{
69
    qemu_set_fd_handler2(s->fd,
70
                         s->read_poll  ? tap_can_send : NULL,
71
                         s->read_poll  ? tap_send     : NULL,
72
                         s->write_poll ? tap_writable : NULL,
73
                         s);
74
}
75

    
76
static void tap_read_poll(TAPState *s, int enable)
77
{
78
    s->read_poll = !!enable;
79
    tap_update_fd_handler(s);
80
}
81

    
82
static void tap_write_poll(TAPState *s, int enable)
83
{
84
    s->write_poll = !!enable;
85
    tap_update_fd_handler(s);
86
}
87

    
88
static void tap_writable(void *opaque)
89
{
90
    TAPState *s = opaque;
91

    
92
    tap_write_poll(s, 0);
93

    
94
    qemu_flush_queued_packets(s->vc);
95
}
96

    
97
static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
98
{
99
    ssize_t len;
100

    
101
    do {
102
        len = writev(s->fd, iov, iovcnt);
103
    } while (len == -1 && errno == EINTR);
104

    
105
    if (len == -1 && errno == EAGAIN) {
106
        tap_write_poll(s, 1);
107
        return 0;
108
    }
109

    
110
    return len;
111
}
112

    
113
static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov,
114
                               int iovcnt)
115
{
116
    TAPState *s = vc->opaque;
117
    const struct iovec *iovp = iov;
118
    struct iovec iov_copy[iovcnt + 1];
119
    struct virtio_net_hdr hdr = { 0, };
120

    
121
    if (s->has_vnet_hdr && !s->using_vnet_hdr) {
122
        iov_copy[0].iov_base = &hdr;
123
        iov_copy[0].iov_len =  sizeof(hdr);
124
        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
125
        iovp = iov_copy;
126
        iovcnt++;
127
    }
128

    
129
    return tap_write_packet(s, iovp, iovcnt);
130
}
131

    
132
static ssize_t tap_receive_raw(VLANClientState *vc, const uint8_t *buf, size_t size)
133
{
134
    TAPState *s = vc->opaque;
135
    struct iovec iov[2];
136
    int iovcnt = 0;
137
    struct virtio_net_hdr hdr = { 0, };
138

    
139
    if (s->has_vnet_hdr) {
140
        iov[iovcnt].iov_base = &hdr;
141
        iov[iovcnt].iov_len  = sizeof(hdr);
142
        iovcnt++;
143
    }
144

    
145
    iov[iovcnt].iov_base = (char *)buf;
146
    iov[iovcnt].iov_len  = size;
147
    iovcnt++;
148

    
149
    return tap_write_packet(s, iov, iovcnt);
150
}
151

    
152
static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size)
153
{
154
    TAPState *s = vc->opaque;
155
    struct iovec iov[1];
156

    
157
    if (s->has_vnet_hdr && !s->using_vnet_hdr) {
158
        return tap_receive_raw(vc, buf, size);
159
    }
160

    
161
    iov[0].iov_base = (char *)buf;
162
    iov[0].iov_len  = size;
163

    
164
    return tap_write_packet(s, iov, 1);
165
}
166

    
167
static int tap_can_send(void *opaque)
168
{
169
    TAPState *s = opaque;
170

    
171
    return qemu_can_send_packet(s->vc);
172
}
173

    
174
#ifndef __sun__
175
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
176
{
177
    return read(tapfd, buf, maxlen);
178
}
179
#endif
180

    
181
static void tap_send_completed(VLANClientState *vc, ssize_t len)
182
{
183
    TAPState *s = vc->opaque;
184
    tap_read_poll(s, 1);
185
}
186

    
187
static void tap_send(void *opaque)
188
{
189
    TAPState *s = opaque;
190
    int size;
191

    
192
    do {
193
        uint8_t *buf = s->buf;
194

    
195
        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
196
        if (size <= 0) {
197
            break;
198
        }
199

    
200
        if (s->has_vnet_hdr && !s->using_vnet_hdr) {
201
            buf  += sizeof(struct virtio_net_hdr);
202
            size -= sizeof(struct virtio_net_hdr);
203
        }
204

    
205
        size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed);
206
        if (size == 0) {
207
            tap_read_poll(s, 0);
208
        }
209
    } while (size > 0);
210
}
211

    
212
/* sndbuf should be set to a value lower than the tx queue
213
 * capacity of any destination network interface.
214
 * Ethernet NICs generally have txqueuelen=1000, so 1Mb is
215
 * a good default, given a 1500 byte MTU.
216
 */
217
#define TAP_DEFAULT_SNDBUF 1024*1024
218

    
219
static int tap_set_sndbuf(TAPState *s, QemuOpts *opts)
220
{
221
    int sndbuf;
222

    
223
    sndbuf = qemu_opt_get_size(opts, "sndbuf", TAP_DEFAULT_SNDBUF);
224
    if (!sndbuf) {
225
        sndbuf = INT_MAX;
226
    }
227

    
228
    if (ioctl(s->fd, TUNSETSNDBUF, &sndbuf) == -1 && qemu_opt_get(opts, "sndbuf")) {
229
        qemu_error("TUNSETSNDBUF ioctl failed: %s\n", strerror(errno));
230
        return -1;
231
    }
232
    return 0;
233
}
234

    
235
int tap_has_ufo(VLANClientState *vc)
236
{
237
    TAPState *s = vc->opaque;
238

    
239
    assert(vc->type == NET_CLIENT_TYPE_TAP);
240

    
241
    return s->has_ufo;
242
}
243

    
244
int tap_has_vnet_hdr(VLANClientState *vc)
245
{
246
    TAPState *s = vc->opaque;
247

    
248
    assert(vc->type == NET_CLIENT_TYPE_TAP);
249

    
250
    return s->has_vnet_hdr;
251
}
252

    
253
void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr)
254
{
255
    TAPState *s = vc->opaque;
256

    
257
    using_vnet_hdr = using_vnet_hdr != 0;
258

    
259
    assert(vc->type == NET_CLIENT_TYPE_TAP);
260
    assert(s->has_vnet_hdr == using_vnet_hdr);
261

    
262
    s->using_vnet_hdr = using_vnet_hdr;
263
}
264

    
265
static int tap_probe_vnet_hdr(int fd)
266
{
267
    struct ifreq ifr;
268

    
269
    if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
270
        qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno));
271
        return 0;
272
    }
273

    
274
    return ifr.ifr_flags & IFF_VNET_HDR;
275
}
276

    
277
void tap_set_offload(VLANClientState *vc, int csum, int tso4,
278
                     int tso6, int ecn, int ufo)
279
{
280
    TAPState *s = vc->opaque;
281
    unsigned int offload = 0;
282

    
283
    if (csum) {
284
        offload |= TUN_F_CSUM;
285
        if (tso4)
286
            offload |= TUN_F_TSO4;
287
        if (tso6)
288
            offload |= TUN_F_TSO6;
289
        if ((tso4 || tso6) && ecn)
290
            offload |= TUN_F_TSO_ECN;
291
        if (ufo)
292
            offload |= TUN_F_UFO;
293
    }
294

    
295
    if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
296
        offload &= ~TUN_F_UFO;
297
        if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
298
            fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
299
                    strerror(errno));
300
        }
301
    }
302
}
303

    
304
static void tap_cleanup(VLANClientState *vc)
305
{
306
    TAPState *s = vc->opaque;
307

    
308
    qemu_purge_queued_packets(vc);
309

    
310
    if (s->down_script[0])
311
        launch_script(s->down_script, s->down_script_arg, s->fd);
312

    
313
    tap_read_poll(s, 0);
314
    tap_write_poll(s, 0);
315
    close(s->fd);
316
    qemu_free(s);
317
}
318

    
319
/* fd support */
320

    
321
static TAPState *net_tap_fd_init(VLANState *vlan,
322
                                 const char *model,
323
                                 const char *name,
324
                                 int fd,
325
                                 int vnet_hdr)
326
{
327
    TAPState *s;
328
    unsigned int offload;
329

    
330
    s = qemu_mallocz(sizeof(TAPState));
331
    s->fd = fd;
332
    s->has_vnet_hdr = vnet_hdr != 0;
333
    s->using_vnet_hdr = 0;
334
    s->vc = qemu_new_vlan_client(NET_CLIENT_TYPE_TAP,
335
                                 vlan, NULL, model, name, NULL,
336
                                 tap_receive, tap_receive_raw,
337
                                 tap_receive_iov, tap_cleanup, s);
338
    s->has_ufo = 0;
339
    /* Check if tap supports UFO */
340
    offload = TUN_F_CSUM | TUN_F_UFO;
341
    if (ioctl(s->fd, TUNSETOFFLOAD, offload) == 0)
342
       s->has_ufo = 1;
343
    tap_set_offload(s->vc, 0, 0, 0, 0, 0);
344
    tap_read_poll(s, 1);
345
    return s;
346
}
347

    
348
static int launch_script(const char *setup_script, const char *ifname, int fd)
349
{
350
    sigset_t oldmask, mask;
351
    int pid, status;
352
    char *args[3];
353
    char **parg;
354

    
355
    sigemptyset(&mask);
356
    sigaddset(&mask, SIGCHLD);
357
    sigprocmask(SIG_BLOCK, &mask, &oldmask);
358

    
359
    /* try to launch network script */
360
    pid = fork();
361
    if (pid == 0) {
362
        int open_max = sysconf(_SC_OPEN_MAX), i;
363

    
364
        for (i = 0; i < open_max; i++) {
365
            if (i != STDIN_FILENO &&
366
                i != STDOUT_FILENO &&
367
                i != STDERR_FILENO &&
368
                i != fd) {
369
                close(i);
370
            }
371
        }
372
        parg = args;
373
        *parg++ = (char *)setup_script;
374
        *parg++ = (char *)ifname;
375
        *parg++ = NULL;
376
        execv(setup_script, args);
377
        _exit(1);
378
    } else if (pid > 0) {
379
        while (waitpid(pid, &status, 0) != pid) {
380
            /* loop */
381
        }
382
        sigprocmask(SIG_SETMASK, &oldmask, NULL);
383

    
384
        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
385
            return 0;
386
        }
387
    }
388
    fprintf(stderr, "%s: could not launch network script\n", setup_script);
389
    return -1;
390
}
391

    
392
static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
393
{
394
    int fd, vnet_hdr_required;
395
    char ifname[128] = {0,};
396
    const char *setup_script;
397

    
398
    if (qemu_opt_get(opts, "ifname")) {
399
        pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
400
    }
401

    
402
    *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
403
    if (qemu_opt_get(opts, "vnet_hdr")) {
404
        vnet_hdr_required = *vnet_hdr;
405
    } else {
406
        vnet_hdr_required = 0;
407
    }
408

    
409
    TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
410
    if (fd < 0) {
411
        return -1;
412
    }
413

    
414
    setup_script = qemu_opt_get(opts, "script");
415
    if (setup_script &&
416
        setup_script[0] != '\0' &&
417
        strcmp(setup_script, "no") != 0 &&
418
        launch_script(setup_script, ifname, fd)) {
419
        close(fd);
420
        return -1;
421
    }
422

    
423
    qemu_opt_set(opts, "ifname", ifname);
424

    
425
    return fd;
426
}
427

    
428
int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
429
{
430
    TAPState *s;
431
    int fd, vnet_hdr;
432

    
433
    if (qemu_opt_get(opts, "fd")) {
434
        if (qemu_opt_get(opts, "ifname") ||
435
            qemu_opt_get(opts, "script") ||
436
            qemu_opt_get(opts, "downscript") ||
437
            qemu_opt_get(opts, "vnet_hdr")) {
438
            qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
439
            return -1;
440
        }
441

    
442
        fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
443
        if (fd == -1) {
444
            return -1;
445
        }
446

    
447
        fcntl(fd, F_SETFL, O_NONBLOCK);
448

    
449
        vnet_hdr = tap_probe_vnet_hdr(fd);
450
    } else {
451
        if (!qemu_opt_get(opts, "script")) {
452
            qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
453
        }
454

    
455
        if (!qemu_opt_get(opts, "downscript")) {
456
            qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
457
        }
458

    
459
        fd = net_tap_init(opts, &vnet_hdr);
460
    }
461

    
462
    s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
463
    if (!s) {
464
        close(fd);
465
        return -1;
466
    }
467

    
468
    if (tap_set_sndbuf(s, opts) < 0) {
469
        return -1;
470
    }
471

    
472
    if (qemu_opt_get(opts, "fd")) {
473
        snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd);
474
    } else {
475
        const char *ifname, *script, *downscript;
476

    
477
        ifname     = qemu_opt_get(opts, "ifname");
478
        script     = qemu_opt_get(opts, "script");
479
        downscript = qemu_opt_get(opts, "downscript");
480

    
481
        snprintf(s->vc->info_str, sizeof(s->vc->info_str),
482
                 "ifname=%s,script=%s,downscript=%s",
483
                 ifname, script, downscript);
484

    
485
        if (strcmp(downscript, "no") != 0) {
486
            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
487
            snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
488
        }
489
    }
490

    
491
    if (vlan) {
492
        vlan->nb_host_devs++;
493
    }
494

    
495
    return 0;
496
}