Statistics
| Branch: | Revision:

root / net / tap.c @ a5fd2c34

History | View | Annotate | Download (13.4 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 * Copyright (c) 2009 Red Hat, Inc.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "net/tap.h"
27

    
28
#include "config-host.h"
29

    
30
#include <signal.h>
31
#include <sys/ioctl.h>
32
#include <sys/stat.h>
33
#include <sys/wait.h>
34
#include <sys/socket.h>
35
#include <net/if.h>
36

    
37
#include "net.h"
38
#include "sysemu.h"
39
#include "qemu-char.h"
40
#include "qemu-common.h"
41
#include "qemu-error.h"
42

    
43
#include "net/tap-linux.h"
44

    
45
#include "hw/vhost_net.h"
46

    
47
/* Maximum GSO packet size (64k) plus plenty of room for
48
 * the ethernet and virtio_net headers
49
 */
50
#define TAP_BUFSIZE (4096 + 65536)
51

    
52
typedef struct TAPState {
53
    VLANClientState nc;
54
    int fd;
55
    char down_script[1024];
56
    char down_script_arg[128];
57
    uint8_t buf[TAP_BUFSIZE];
58
    unsigned int read_poll : 1;
59
    unsigned int write_poll : 1;
60
    unsigned int using_vnet_hdr : 1;
61
    unsigned int has_ufo: 1;
62
    VHostNetState *vhost_net;
63
    unsigned host_vnet_hdr_len;
64
} TAPState;
65

    
66
static int launch_script(const char *setup_script, const char *ifname, int fd);
67

    
68
static int tap_can_send(void *opaque);
69
static void tap_send(void *opaque);
70
static void tap_writable(void *opaque);
71

    
72
static void tap_update_fd_handler(TAPState *s)
73
{
74
    qemu_set_fd_handler2(s->fd,
75
                         s->read_poll  ? tap_can_send : NULL,
76
                         s->read_poll  ? tap_send     : NULL,
77
                         s->write_poll ? tap_writable : NULL,
78
                         s);
79
}
80

    
81
static void tap_read_poll(TAPState *s, int enable)
82
{
83
    s->read_poll = !!enable;
84
    tap_update_fd_handler(s);
85
}
86

    
87
static void tap_write_poll(TAPState *s, int enable)
88
{
89
    s->write_poll = !!enable;
90
    tap_update_fd_handler(s);
91
}
92

    
93
static void tap_writable(void *opaque)
94
{
95
    TAPState *s = opaque;
96

    
97
    tap_write_poll(s, 0);
98

    
99
    qemu_flush_queued_packets(&s->nc);
100
}
101

    
102
static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
103
{
104
    ssize_t len;
105

    
106
    do {
107
        len = writev(s->fd, iov, iovcnt);
108
    } while (len == -1 && errno == EINTR);
109

    
110
    if (len == -1 && errno == EAGAIN) {
111
        tap_write_poll(s, 1);
112
        return 0;
113
    }
114

    
115
    return len;
116
}
117

    
118
static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
119
                               int iovcnt)
120
{
121
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
122
    const struct iovec *iovp = iov;
123
    struct iovec iov_copy[iovcnt + 1];
124
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
125

    
126
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
127
        iov_copy[0].iov_base = &hdr;
128
        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
129
        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
130
        iovp = iov_copy;
131
        iovcnt++;
132
    }
133

    
134
    return tap_write_packet(s, iovp, iovcnt);
135
}
136

    
137
static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
138
{
139
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
140
    struct iovec iov[2];
141
    int iovcnt = 0;
142
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
143

    
144
    if (s->host_vnet_hdr_len) {
145
        iov[iovcnt].iov_base = &hdr;
146
        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
147
        iovcnt++;
148
    }
149

    
150
    iov[iovcnt].iov_base = (char *)buf;
151
    iov[iovcnt].iov_len  = size;
152
    iovcnt++;
153

    
154
    return tap_write_packet(s, iov, iovcnt);
155
}
156

    
157
static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
158
{
159
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
160
    struct iovec iov[1];
161

    
162
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
163
        return tap_receive_raw(nc, buf, size);
164
    }
165

    
166
    iov[0].iov_base = (char *)buf;
167
    iov[0].iov_len  = size;
168

    
169
    return tap_write_packet(s, iov, 1);
170
}
171

    
172
static int tap_can_send(void *opaque)
173
{
174
    TAPState *s = opaque;
175

    
176
    return qemu_can_send_packet(&s->nc);
177
}
178

    
179
#ifndef __sun__
180
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
181
{
182
    return read(tapfd, buf, maxlen);
183
}
184
#endif
185

    
186
static void tap_send_completed(VLANClientState *nc, ssize_t len)
187
{
188
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
189
    tap_read_poll(s, 1);
190
}
191

    
192
static void tap_send(void *opaque)
193
{
194
    TAPState *s = opaque;
195
    int size;
196

    
197
    do {
198
        uint8_t *buf = s->buf;
199

    
200
        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
201
        if (size <= 0) {
202
            break;
203
        }
204

    
205
        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
206
            buf  += s->host_vnet_hdr_len;
207
            size -= s->host_vnet_hdr_len;
208
        }
209

    
210
        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
211
        if (size == 0) {
212
            tap_read_poll(s, 0);
213
        }
214
    } while (size > 0 && qemu_can_send_packet(&s->nc));
215
}
216

    
217
int tap_has_ufo(VLANClientState *nc)
218
{
219
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
220

    
221
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
222

    
223
    return s->has_ufo;
224
}
225

    
226
int tap_has_vnet_hdr(VLANClientState *nc)
227
{
228
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
229

    
230
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
231

    
232
    return !!s->host_vnet_hdr_len;
233
}
234

    
235
int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
236
{
237
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
238

    
239
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
240

    
241
    return tap_probe_vnet_hdr_len(s->fd, len);
242
}
243

    
244
void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
245
{
246
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
247

    
248
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
249
    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
250
           len == sizeof(struct virtio_net_hdr));
251

    
252
    tap_fd_set_vnet_hdr_len(s->fd, len);
253
    s->host_vnet_hdr_len = len;
254
}
255

    
256
void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
257
{
258
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
259

    
260
    using_vnet_hdr = using_vnet_hdr != 0;
261

    
262
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
263
    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
264

    
265
    s->using_vnet_hdr = using_vnet_hdr;
266
}
267

    
268
void tap_set_offload(VLANClientState *nc, int csum, int tso4,
269
                     int tso6, int ecn, int ufo)
270
{
271
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
272
    if (s->fd < 0) {
273
        return;
274
    }
275

    
276
    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
277
}
278

    
279
static void tap_cleanup(VLANClientState *nc)
280
{
281
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
282

    
283
    if (s->vhost_net) {
284
        vhost_net_cleanup(s->vhost_net);
285
        s->vhost_net = NULL;
286
    }
287

    
288
    qemu_purge_queued_packets(nc);
289

    
290
    if (s->down_script[0])
291
        launch_script(s->down_script, s->down_script_arg, s->fd);
292

    
293
    tap_read_poll(s, 0);
294
    tap_write_poll(s, 0);
295
    close(s->fd);
296
    s->fd = -1;
297
}
298

    
299
static void tap_poll(VLANClientState *nc, bool enable)
300
{
301
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
302
    tap_read_poll(s, enable);
303
    tap_write_poll(s, enable);
304
}
305

    
306
int tap_get_fd(VLANClientState *nc)
307
{
308
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
309
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
310
    return s->fd;
311
}
312

    
313
/* fd support */
314

    
315
static NetClientInfo net_tap_info = {
316
    .type = NET_CLIENT_TYPE_TAP,
317
    .size = sizeof(TAPState),
318
    .receive = tap_receive,
319
    .receive_raw = tap_receive_raw,
320
    .receive_iov = tap_receive_iov,
321
    .poll = tap_poll,
322
    .cleanup = tap_cleanup,
323
};
324

    
325
static TAPState *net_tap_fd_init(VLANState *vlan,
326
                                 const char *model,
327
                                 const char *name,
328
                                 int fd,
329
                                 int vnet_hdr)
330
{
331
    VLANClientState *nc;
332
    TAPState *s;
333

    
334
    nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
335

    
336
    s = DO_UPCAST(TAPState, nc, nc);
337

    
338
    s->fd = fd;
339
    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
340
    s->using_vnet_hdr = 0;
341
    s->has_ufo = tap_probe_has_ufo(s->fd);
342
    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
343
    tap_read_poll(s, 1);
344
    s->vhost_net = NULL;
345
    return s;
346
}
347

    
348
static int launch_script(const char *setup_script, const char *ifname, int fd)
349
{
350
    sigset_t oldmask, mask;
351
    int pid, status;
352
    char *args[3];
353
    char **parg;
354

    
355
    sigemptyset(&mask);
356
    sigaddset(&mask, SIGCHLD);
357
    sigprocmask(SIG_BLOCK, &mask, &oldmask);
358

    
359
    /* try to launch network script */
360
    pid = fork();
361
    if (pid == 0) {
362
        int open_max = sysconf(_SC_OPEN_MAX), i;
363

    
364
        for (i = 0; i < open_max; i++) {
365
            if (i != STDIN_FILENO &&
366
                i != STDOUT_FILENO &&
367
                i != STDERR_FILENO &&
368
                i != fd) {
369
                close(i);
370
            }
371
        }
372
        parg = args;
373
        *parg++ = (char *)setup_script;
374
        *parg++ = (char *)ifname;
375
        *parg = NULL;
376
        execv(setup_script, args);
377
        _exit(1);
378
    } else if (pid > 0) {
379
        while (waitpid(pid, &status, 0) != pid) {
380
            /* loop */
381
        }
382
        sigprocmask(SIG_SETMASK, &oldmask, NULL);
383

    
384
        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
385
            return 0;
386
        }
387
    }
388
    fprintf(stderr, "%s: could not launch network script\n", setup_script);
389
    return -1;
390
}
391

    
392
static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
393
{
394
    int fd, vnet_hdr_required;
395
    char ifname[128] = {0,};
396
    const char *setup_script;
397

    
398
    if (qemu_opt_get(opts, "ifname")) {
399
        pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
400
    }
401

    
402
    *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
403
    if (qemu_opt_get(opts, "vnet_hdr")) {
404
        vnet_hdr_required = *vnet_hdr;
405
    } else {
406
        vnet_hdr_required = 0;
407
    }
408

    
409
    TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
410
    if (fd < 0) {
411
        return -1;
412
    }
413

    
414
    setup_script = qemu_opt_get(opts, "script");
415
    if (setup_script &&
416
        setup_script[0] != '\0' &&
417
        strcmp(setup_script, "no") != 0 &&
418
        launch_script(setup_script, ifname, fd)) {
419
        close(fd);
420
        return -1;
421
    }
422

    
423
    qemu_opt_set(opts, "ifname", ifname);
424

    
425
    return fd;
426
}
427

    
428
int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
429
{
430
    TAPState *s;
431
    int fd, vnet_hdr = 0;
432

    
433
    if (qemu_opt_get(opts, "fd")) {
434
        if (qemu_opt_get(opts, "ifname") ||
435
            qemu_opt_get(opts, "script") ||
436
            qemu_opt_get(opts, "downscript") ||
437
            qemu_opt_get(opts, "vnet_hdr")) {
438
            error_report("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=");
439
            return -1;
440
        }
441

    
442
        fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
443
        if (fd == -1) {
444
            return -1;
445
        }
446

    
447
        fcntl(fd, F_SETFL, O_NONBLOCK);
448

    
449
        vnet_hdr = tap_probe_vnet_hdr(fd);
450
    } else {
451
        if (!qemu_opt_get(opts, "script")) {
452
            qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
453
        }
454

    
455
        if (!qemu_opt_get(opts, "downscript")) {
456
            qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
457
        }
458

    
459
        fd = net_tap_init(opts, &vnet_hdr);
460
        if (fd == -1) {
461
            return -1;
462
        }
463
    }
464

    
465
    s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
466
    if (!s) {
467
        close(fd);
468
        return -1;
469
    }
470

    
471
    if (tap_set_sndbuf(s->fd, opts) < 0) {
472
        return -1;
473
    }
474

    
475
    if (qemu_opt_get(opts, "fd")) {
476
        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
477
    } else {
478
        const char *ifname, *script, *downscript;
479

    
480
        ifname     = qemu_opt_get(opts, "ifname");
481
        script     = qemu_opt_get(opts, "script");
482
        downscript = qemu_opt_get(opts, "downscript");
483

    
484
        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
485
                 "ifname=%s,script=%s,downscript=%s",
486
                 ifname, script, downscript);
487

    
488
        if (strcmp(downscript, "no") != 0) {
489
            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
490
            snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
491
        }
492
    }
493

    
494
    if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd"))) {
495
        int vhostfd, r;
496
        if (qemu_opt_get(opts, "vhostfd")) {
497
            r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
498
            if (r == -1) {
499
                return -1;
500
            }
501
            vhostfd = r;
502
        } else {
503
            vhostfd = -1;
504
        }
505
        s->vhost_net = vhost_net_init(&s->nc, vhostfd);
506
        if (!s->vhost_net) {
507
            error_report("vhost-net requested but could not be initialized");
508
            return -1;
509
        }
510
    } else if (qemu_opt_get(opts, "vhostfd")) {
511
        error_report("vhostfd= is not valid without vhost");
512
        return -1;
513
    }
514

    
515
    return 0;
516
}
517

    
518
VHostNetState *tap_get_vhost_net(VLANClientState *nc)
519
{
520
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
521
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
522
    return s->vhost_net;
523
}