Statistics
| Branch: | Revision:

root / net / tap.c @ 54cdaa1b

History | View | Annotate | Download (13.3 kB)

1
/*
2
 * QEMU System Emulator
3
 *
4
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 * Copyright (c) 2009 Red Hat, Inc.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a copy
8
 * of this software and associated documentation files (the "Software"), to deal
9
 * in the Software without restriction, including without limitation the rights
10
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
 * copies of the Software, and to permit persons to whom the Software is
12
 * furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
 * THE SOFTWARE.
24
 */
25

    
26
#include "net/tap.h"
27

    
28
#include "config-host.h"
29

    
30
#include <signal.h>
31
#include <sys/ioctl.h>
32
#include <sys/stat.h>
33
#include <sys/wait.h>
34
#include <sys/socket.h>
35
#include <net/if.h>
36

    
37
#include "net.h"
38
#include "sysemu.h"
39
#include "qemu-char.h"
40
#include "qemu-common.h"
41
#include "qemu-error.h"
42

    
43
#include "net/tap-linux.h"
44

    
45
#include "hw/vhost_net.h"
46

    
47
/* Maximum GSO packet size (64k) plus plenty of room for
48
 * the ethernet and virtio_net headers
49
 */
50
#define TAP_BUFSIZE (4096 + 65536)
51

    
52
typedef struct TAPState {
53
    VLANClientState nc;
54
    int fd;
55
    char down_script[1024];
56
    char down_script_arg[128];
57
    uint8_t buf[TAP_BUFSIZE];
58
    unsigned int read_poll : 1;
59
    unsigned int write_poll : 1;
60
    unsigned int using_vnet_hdr : 1;
61
    unsigned int has_ufo: 1;
62
    VHostNetState *vhost_net;
63
    unsigned host_vnet_hdr_len;
64
} TAPState;
65

    
66
static int launch_script(const char *setup_script, const char *ifname, int fd);
67

    
68
static int tap_can_send(void *opaque);
69
static void tap_send(void *opaque);
70
static void tap_writable(void *opaque);
71

    
72
static void tap_update_fd_handler(TAPState *s)
73
{
74
    qemu_set_fd_handler2(s->fd,
75
                         s->read_poll  ? tap_can_send : NULL,
76
                         s->read_poll  ? tap_send     : NULL,
77
                         s->write_poll ? tap_writable : NULL,
78
                         s);
79
}
80

    
81
static void tap_read_poll(TAPState *s, int enable)
82
{
83
    s->read_poll = !!enable;
84
    tap_update_fd_handler(s);
85
}
86

    
87
static void tap_write_poll(TAPState *s, int enable)
88
{
89
    s->write_poll = !!enable;
90
    tap_update_fd_handler(s);
91
}
92

    
93
static void tap_writable(void *opaque)
94
{
95
    TAPState *s = opaque;
96

    
97
    tap_write_poll(s, 0);
98

    
99
    qemu_flush_queued_packets(&s->nc);
100
}
101

    
102
static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
103
{
104
    ssize_t len;
105

    
106
    do {
107
        len = writev(s->fd, iov, iovcnt);
108
    } while (len == -1 && errno == EINTR);
109

    
110
    if (len == -1 && errno == EAGAIN) {
111
        tap_write_poll(s, 1);
112
        return 0;
113
    }
114

    
115
    return len;
116
}
117

    
118
static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov,
119
                               int iovcnt)
120
{
121
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
122
    const struct iovec *iovp = iov;
123
    struct iovec iov_copy[iovcnt + 1];
124
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
125

    
126
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
127
        iov_copy[0].iov_base = &hdr;
128
        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
129
        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
130
        iovp = iov_copy;
131
        iovcnt++;
132
    }
133

    
134
    return tap_write_packet(s, iovp, iovcnt);
135
}
136

    
137
static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t size)
138
{
139
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
140
    struct iovec iov[2];
141
    int iovcnt = 0;
142
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
143

    
144
    if (s->host_vnet_hdr_len) {
145
        iov[iovcnt].iov_base = &hdr;
146
        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
147
        iovcnt++;
148
    }
149

    
150
    iov[iovcnt].iov_base = (char *)buf;
151
    iov[iovcnt].iov_len  = size;
152
    iovcnt++;
153

    
154
    return tap_write_packet(s, iov, iovcnt);
155
}
156

    
157
static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
158
{
159
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
160
    struct iovec iov[1];
161

    
162
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
163
        return tap_receive_raw(nc, buf, size);
164
    }
165

    
166
    iov[0].iov_base = (char *)buf;
167
    iov[0].iov_len  = size;
168

    
169
    return tap_write_packet(s, iov, 1);
170
}
171

    
172
static int tap_can_send(void *opaque)
173
{
174
    TAPState *s = opaque;
175

    
176
    return qemu_can_send_packet(&s->nc);
177
}
178

    
179
#ifndef __sun__
180
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
181
{
182
    return read(tapfd, buf, maxlen);
183
}
184
#endif
185

    
186
static void tap_send_completed(VLANClientState *nc, ssize_t len)
187
{
188
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
189
    tap_read_poll(s, 1);
190
}
191

    
192
static void tap_send(void *opaque)
193
{
194
    TAPState *s = opaque;
195
    int size;
196

    
197
    do {
198
        uint8_t *buf = s->buf;
199

    
200
        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
201
        if (size <= 0) {
202
            break;
203
        }
204

    
205
        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
206
            buf  += s->host_vnet_hdr_len;
207
            size -= s->host_vnet_hdr_len;
208
        }
209

    
210
        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
211
        if (size == 0) {
212
            tap_read_poll(s, 0);
213
        }
214
    } while (size > 0 && qemu_can_send_packet(&s->nc));
215
}
216

    
217
int tap_has_ufo(VLANClientState *nc)
218
{
219
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
220

    
221
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
222

    
223
    return s->has_ufo;
224
}
225

    
226
int tap_has_vnet_hdr(VLANClientState *nc)
227
{
228
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
229

    
230
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
231

    
232
    return !!s->host_vnet_hdr_len;
233
}
234

    
235
int tap_has_vnet_hdr_len(VLANClientState *nc, int len)
236
{
237
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
238

    
239
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
240

    
241
    return tap_probe_vnet_hdr_len(s->fd, len);
242
}
243

    
244
void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
245
{
246
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
247

    
248
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
249
    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
250
           len == sizeof(struct virtio_net_hdr));
251

    
252
    tap_fd_set_vnet_hdr_len(s->fd, len);
253
    s->host_vnet_hdr_len = len;
254
}
255

    
256
void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
257
{
258
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
259

    
260
    using_vnet_hdr = using_vnet_hdr != 0;
261

    
262
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
263
    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
264

    
265
    s->using_vnet_hdr = using_vnet_hdr;
266
}
267

    
268
void tap_set_offload(VLANClientState *nc, int csum, int tso4,
269
                     int tso6, int ecn, int ufo)
270
{
271
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
272

    
273
    return tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
274
}
275

    
276
static void tap_cleanup(VLANClientState *nc)
277
{
278
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
279

    
280
    if (s->vhost_net) {
281
        vhost_net_cleanup(s->vhost_net);
282
        s->vhost_net = NULL;
283
    }
284

    
285
    qemu_purge_queued_packets(nc);
286

    
287
    if (s->down_script[0])
288
        launch_script(s->down_script, s->down_script_arg, s->fd);
289

    
290
    tap_read_poll(s, 0);
291
    tap_write_poll(s, 0);
292
    close(s->fd);
293
}
294

    
295
static void tap_poll(VLANClientState *nc, bool enable)
296
{
297
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
298
    tap_read_poll(s, enable);
299
    tap_write_poll(s, enable);
300
}
301

    
302
int tap_get_fd(VLANClientState *nc)
303
{
304
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
305
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
306
    return s->fd;
307
}
308

    
309
/* fd support */
310

    
311
static NetClientInfo net_tap_info = {
312
    .type = NET_CLIENT_TYPE_TAP,
313
    .size = sizeof(TAPState),
314
    .receive = tap_receive,
315
    .receive_raw = tap_receive_raw,
316
    .receive_iov = tap_receive_iov,
317
    .poll = tap_poll,
318
    .cleanup = tap_cleanup,
319
};
320

    
321
static TAPState *net_tap_fd_init(VLANState *vlan,
322
                                 const char *model,
323
                                 const char *name,
324
                                 int fd,
325
                                 int vnet_hdr)
326
{
327
    VLANClientState *nc;
328
    TAPState *s;
329

    
330
    nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
331

    
332
    s = DO_UPCAST(TAPState, nc, nc);
333

    
334
    s->fd = fd;
335
    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
336
    s->using_vnet_hdr = 0;
337
    s->has_ufo = tap_probe_has_ufo(s->fd);
338
    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
339
    tap_read_poll(s, 1);
340
    s->vhost_net = NULL;
341
    return s;
342
}
343

    
344
static int launch_script(const char *setup_script, const char *ifname, int fd)
345
{
346
    sigset_t oldmask, mask;
347
    int pid, status;
348
    char *args[3];
349
    char **parg;
350

    
351
    sigemptyset(&mask);
352
    sigaddset(&mask, SIGCHLD);
353
    sigprocmask(SIG_BLOCK, &mask, &oldmask);
354

    
355
    /* try to launch network script */
356
    pid = fork();
357
    if (pid == 0) {
358
        int open_max = sysconf(_SC_OPEN_MAX), i;
359

    
360
        for (i = 0; i < open_max; i++) {
361
            if (i != STDIN_FILENO &&
362
                i != STDOUT_FILENO &&
363
                i != STDERR_FILENO &&
364
                i != fd) {
365
                close(i);
366
            }
367
        }
368
        parg = args;
369
        *parg++ = (char *)setup_script;
370
        *parg++ = (char *)ifname;
371
        *parg = NULL;
372
        execv(setup_script, args);
373
        _exit(1);
374
    } else if (pid > 0) {
375
        while (waitpid(pid, &status, 0) != pid) {
376
            /* loop */
377
        }
378
        sigprocmask(SIG_SETMASK, &oldmask, NULL);
379

    
380
        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
381
            return 0;
382
        }
383
    }
384
    fprintf(stderr, "%s: could not launch network script\n", setup_script);
385
    return -1;
386
}
387

    
388
static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
389
{
390
    int fd, vnet_hdr_required;
391
    char ifname[128] = {0,};
392
    const char *setup_script;
393

    
394
    if (qemu_opt_get(opts, "ifname")) {
395
        pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
396
    }
397

    
398
    *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
399
    if (qemu_opt_get(opts, "vnet_hdr")) {
400
        vnet_hdr_required = *vnet_hdr;
401
    } else {
402
        vnet_hdr_required = 0;
403
    }
404

    
405
    TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
406
    if (fd < 0) {
407
        return -1;
408
    }
409

    
410
    setup_script = qemu_opt_get(opts, "script");
411
    if (setup_script &&
412
        setup_script[0] != '\0' &&
413
        strcmp(setup_script, "no") != 0 &&
414
        launch_script(setup_script, ifname, fd)) {
415
        close(fd);
416
        return -1;
417
    }
418

    
419
    qemu_opt_set(opts, "ifname", ifname);
420

    
421
    return fd;
422
}
423

    
424
int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
425
{
426
    TAPState *s;
427
    int fd, vnet_hdr = 0;
428

    
429
    if (qemu_opt_get(opts, "fd")) {
430
        if (qemu_opt_get(opts, "ifname") ||
431
            qemu_opt_get(opts, "script") ||
432
            qemu_opt_get(opts, "downscript") ||
433
            qemu_opt_get(opts, "vnet_hdr")) {
434
            error_report("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=");
435
            return -1;
436
        }
437

    
438
        fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
439
        if (fd == -1) {
440
            return -1;
441
        }
442

    
443
        fcntl(fd, F_SETFL, O_NONBLOCK);
444

    
445
        vnet_hdr = tap_probe_vnet_hdr(fd);
446
    } else {
447
        if (!qemu_opt_get(opts, "script")) {
448
            qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
449
        }
450

    
451
        if (!qemu_opt_get(opts, "downscript")) {
452
            qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
453
        }
454

    
455
        fd = net_tap_init(opts, &vnet_hdr);
456
        if (fd == -1) {
457
            return -1;
458
        }
459
    }
460

    
461
    s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
462
    if (!s) {
463
        close(fd);
464
        return -1;
465
    }
466

    
467
    if (tap_set_sndbuf(s->fd, opts) < 0) {
468
        return -1;
469
    }
470

    
471
    if (qemu_opt_get(opts, "fd")) {
472
        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
473
    } else {
474
        const char *ifname, *script, *downscript;
475

    
476
        ifname     = qemu_opt_get(opts, "ifname");
477
        script     = qemu_opt_get(opts, "script");
478
        downscript = qemu_opt_get(opts, "downscript");
479

    
480
        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
481
                 "ifname=%s,script=%s,downscript=%s",
482
                 ifname, script, downscript);
483

    
484
        if (strcmp(downscript, "no") != 0) {
485
            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
486
            snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
487
        }
488
    }
489

    
490
    if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd"))) {
491
        int vhostfd, r;
492
        if (qemu_opt_get(opts, "vhostfd")) {
493
            r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
494
            if (r == -1) {
495
                return -1;
496
            }
497
            vhostfd = r;
498
        } else {
499
            vhostfd = -1;
500
        }
501
        s->vhost_net = vhost_net_init(&s->nc, vhostfd);
502
        if (!s->vhost_net) {
503
            error_report("vhost-net requested but could not be initialized");
504
            return -1;
505
        }
506
    } else if (qemu_opt_get(opts, "vhostfd")) {
507
        error_report("vhostfd= is not valid without vhost");
508
        return -1;
509
    }
510

    
511
    return 0;
512
}
513

    
514
VHostNetState *tap_get_vhost_net(VLANClientState *nc)
515
{
516
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
517
    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
518
    return s->vhost_net;
519
}