root / net / tap.c @ 966ea5ec
History | View | Annotate | Download (14 kB)
1 |
/*
|
---|---|
2 |
* QEMU System Emulator
|
3 |
*
|
4 |
* Copyright (c) 2003-2008 Fabrice Bellard
|
5 |
* Copyright (c) 2009 Red Hat, Inc.
|
6 |
*
|
7 |
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
8 |
* of this software and associated documentation files (the "Software"), to deal
|
9 |
* in the Software without restriction, including without limitation the rights
|
10 |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11 |
* copies of the Software, and to permit persons to whom the Software is
|
12 |
* furnished to do so, subject to the following conditions:
|
13 |
*
|
14 |
* The above copyright notice and this permission notice shall be included in
|
15 |
* all copies or substantial portions of the Software.
|
16 |
*
|
17 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18 |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19 |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
20 |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21 |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
22 |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23 |
* THE SOFTWARE.
|
24 |
*/
|
25 |
|
26 |
#include "net/tap.h" |
27 |
|
28 |
#include "config-host.h" |
29 |
|
30 |
#include <signal.h> |
31 |
#include <sys/ioctl.h> |
32 |
#include <sys/stat.h> |
33 |
#include <sys/wait.h> |
34 |
#include <net/if.h> |
35 |
|
36 |
#include "net.h" |
37 |
#include "sysemu.h" |
38 |
#include "qemu-char.h" |
39 |
#include "qemu-common.h" |
40 |
|
41 |
#ifdef __linux__
|
42 |
#include "net/tap-linux.h" |
43 |
#endif
|
44 |
|
45 |
#if !defined(_AIX)
|
46 |
|
47 |
/* Maximum GSO packet size (64k) plus plenty of room for
|
48 |
* the ethernet and virtio_net headers
|
49 |
*/
|
50 |
#define TAP_BUFSIZE (4096 + 65536) |
51 |
|
52 |
typedef struct TAPState { |
53 |
VLANClientState *vc; |
54 |
int fd;
|
55 |
char down_script[1024]; |
56 |
char down_script_arg[128]; |
57 |
uint8_t buf[TAP_BUFSIZE]; |
58 |
unsigned int read_poll : 1; |
59 |
unsigned int write_poll : 1; |
60 |
unsigned int has_vnet_hdr : 1; |
61 |
unsigned int using_vnet_hdr : 1; |
62 |
unsigned int has_ufo: 1; |
63 |
} TAPState; |
64 |
|
65 |
static int launch_script(const char *setup_script, const char *ifname, int fd); |
66 |
|
67 |
static int tap_can_send(void *opaque); |
68 |
static void tap_send(void *opaque); |
69 |
static void tap_writable(void *opaque); |
70 |
|
71 |
static void tap_update_fd_handler(TAPState *s) |
72 |
{ |
73 |
qemu_set_fd_handler2(s->fd, |
74 |
s->read_poll ? tap_can_send : NULL,
|
75 |
s->read_poll ? tap_send : NULL,
|
76 |
s->write_poll ? tap_writable : NULL,
|
77 |
s); |
78 |
} |
79 |
|
80 |
static void tap_read_poll(TAPState *s, int enable) |
81 |
{ |
82 |
s->read_poll = !!enable; |
83 |
tap_update_fd_handler(s); |
84 |
} |
85 |
|
86 |
static void tap_write_poll(TAPState *s, int enable) |
87 |
{ |
88 |
s->write_poll = !!enable; |
89 |
tap_update_fd_handler(s); |
90 |
} |
91 |
|
92 |
static void tap_writable(void *opaque) |
93 |
{ |
94 |
TAPState *s = opaque; |
95 |
|
96 |
tap_write_poll(s, 0);
|
97 |
|
98 |
qemu_flush_queued_packets(s->vc); |
99 |
} |
100 |
|
101 |
static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt) |
102 |
{ |
103 |
ssize_t len; |
104 |
|
105 |
do {
|
106 |
len = writev(s->fd, iov, iovcnt); |
107 |
} while (len == -1 && errno == EINTR); |
108 |
|
109 |
if (len == -1 && errno == EAGAIN) { |
110 |
tap_write_poll(s, 1);
|
111 |
return 0; |
112 |
} |
113 |
|
114 |
return len;
|
115 |
} |
116 |
|
117 |
static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov, |
118 |
int iovcnt)
|
119 |
{ |
120 |
TAPState *s = vc->opaque; |
121 |
const struct iovec *iovp = iov; |
122 |
struct iovec iov_copy[iovcnt + 1]; |
123 |
struct virtio_net_hdr hdr = { 0, }; |
124 |
|
125 |
if (s->has_vnet_hdr && !s->using_vnet_hdr) {
|
126 |
iov_copy[0].iov_base = &hdr;
|
127 |
iov_copy[0].iov_len = sizeof(hdr); |
128 |
memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); |
129 |
iovp = iov_copy; |
130 |
iovcnt++; |
131 |
} |
132 |
|
133 |
return tap_write_packet(s, iovp, iovcnt);
|
134 |
} |
135 |
|
136 |
static ssize_t tap_receive_raw(VLANClientState *vc, const uint8_t *buf, size_t size) |
137 |
{ |
138 |
TAPState *s = vc->opaque; |
139 |
struct iovec iov[2]; |
140 |
int iovcnt = 0; |
141 |
struct virtio_net_hdr hdr = { 0, }; |
142 |
|
143 |
if (s->has_vnet_hdr) {
|
144 |
iov[iovcnt].iov_base = &hdr; |
145 |
iov[iovcnt].iov_len = sizeof(hdr);
|
146 |
iovcnt++; |
147 |
} |
148 |
|
149 |
iov[iovcnt].iov_base = (char *)buf;
|
150 |
iov[iovcnt].iov_len = size; |
151 |
iovcnt++; |
152 |
|
153 |
return tap_write_packet(s, iov, iovcnt);
|
154 |
} |
155 |
|
156 |
static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size) |
157 |
{ |
158 |
TAPState *s = vc->opaque; |
159 |
struct iovec iov[1]; |
160 |
|
161 |
if (s->has_vnet_hdr && !s->using_vnet_hdr) {
|
162 |
return tap_receive_raw(vc, buf, size);
|
163 |
} |
164 |
|
165 |
iov[0].iov_base = (char *)buf; |
166 |
iov[0].iov_len = size;
|
167 |
|
168 |
return tap_write_packet(s, iov, 1); |
169 |
} |
170 |
|
171 |
static int tap_can_send(void *opaque) |
172 |
{ |
173 |
TAPState *s = opaque; |
174 |
|
175 |
return qemu_can_send_packet(s->vc);
|
176 |
} |
177 |
|
178 |
#ifndef __sun__
|
179 |
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen) |
180 |
{ |
181 |
return read(tapfd, buf, maxlen);
|
182 |
} |
183 |
#endif
|
184 |
|
185 |
static void tap_send_completed(VLANClientState *vc, ssize_t len) |
186 |
{ |
187 |
TAPState *s = vc->opaque; |
188 |
tap_read_poll(s, 1);
|
189 |
} |
190 |
|
191 |
static void tap_send(void *opaque) |
192 |
{ |
193 |
TAPState *s = opaque; |
194 |
int size;
|
195 |
|
196 |
do {
|
197 |
uint8_t *buf = s->buf; |
198 |
|
199 |
size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
|
200 |
if (size <= 0) { |
201 |
break;
|
202 |
} |
203 |
|
204 |
if (s->has_vnet_hdr && !s->using_vnet_hdr) {
|
205 |
buf += sizeof(struct virtio_net_hdr); |
206 |
size -= sizeof(struct virtio_net_hdr); |
207 |
} |
208 |
|
209 |
size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed); |
210 |
if (size == 0) { |
211 |
tap_read_poll(s, 0);
|
212 |
} |
213 |
} while (size > 0); |
214 |
} |
215 |
|
216 |
/* sndbuf should be set to a value lower than the tx queue
|
217 |
* capacity of any destination network interface.
|
218 |
* Ethernet NICs generally have txqueuelen=1000, so 1Mb is
|
219 |
* a good default, given a 1500 byte MTU.
|
220 |
*/
|
221 |
#define TAP_DEFAULT_SNDBUF 1024*1024 |
222 |
|
223 |
static int tap_set_sndbuf(TAPState *s, QemuOpts *opts) |
224 |
{ |
225 |
int sndbuf;
|
226 |
|
227 |
sndbuf = qemu_opt_get_size(opts, "sndbuf", TAP_DEFAULT_SNDBUF);
|
228 |
if (!sndbuf) {
|
229 |
sndbuf = INT_MAX; |
230 |
} |
231 |
|
232 |
if (ioctl(s->fd, TUNSETSNDBUF, &sndbuf) == -1 && qemu_opt_get(opts, "sndbuf")) { |
233 |
qemu_error("TUNSETSNDBUF ioctl failed: %s\n", strerror(errno));
|
234 |
return -1; |
235 |
} |
236 |
return 0; |
237 |
} |
238 |
|
239 |
int tap_has_ufo(VLANClientState *vc)
|
240 |
{ |
241 |
TAPState *s = vc->opaque; |
242 |
|
243 |
assert(vc->type == NET_CLIENT_TYPE_TAP); |
244 |
|
245 |
return s->has_ufo;
|
246 |
} |
247 |
|
248 |
int tap_has_vnet_hdr(VLANClientState *vc)
|
249 |
{ |
250 |
TAPState *s = vc->opaque; |
251 |
|
252 |
assert(vc->type == NET_CLIENT_TYPE_TAP); |
253 |
|
254 |
return s->has_vnet_hdr;
|
255 |
} |
256 |
|
257 |
void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr) |
258 |
{ |
259 |
TAPState *s = vc->opaque; |
260 |
|
261 |
using_vnet_hdr = using_vnet_hdr != 0;
|
262 |
|
263 |
assert(vc->type == NET_CLIENT_TYPE_TAP); |
264 |
assert(s->has_vnet_hdr == using_vnet_hdr); |
265 |
|
266 |
s->using_vnet_hdr = using_vnet_hdr; |
267 |
} |
268 |
|
269 |
static int tap_probe_vnet_hdr(int fd) |
270 |
{ |
271 |
struct ifreq ifr;
|
272 |
|
273 |
if (ioctl(fd, TUNGETIFF, &ifr) != 0) { |
274 |
qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno));
|
275 |
return 0; |
276 |
} |
277 |
|
278 |
return ifr.ifr_flags & IFF_VNET_HDR;
|
279 |
} |
280 |
|
281 |
void tap_set_offload(VLANClientState *vc, int csum, int tso4, |
282 |
int tso6, int ecn, int ufo) |
283 |
{ |
284 |
TAPState *s = vc->opaque; |
285 |
unsigned int offload = 0; |
286 |
|
287 |
if (csum) {
|
288 |
offload |= TUN_F_CSUM; |
289 |
if (tso4)
|
290 |
offload |= TUN_F_TSO4; |
291 |
if (tso6)
|
292 |
offload |= TUN_F_TSO6; |
293 |
if ((tso4 || tso6) && ecn)
|
294 |
offload |= TUN_F_TSO_ECN; |
295 |
if (ufo)
|
296 |
offload |= TUN_F_UFO; |
297 |
} |
298 |
|
299 |
if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) { |
300 |
offload &= ~TUN_F_UFO; |
301 |
if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) { |
302 |
fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
|
303 |
strerror(errno)); |
304 |
} |
305 |
} |
306 |
} |
307 |
|
308 |
static void tap_cleanup(VLANClientState *vc) |
309 |
{ |
310 |
TAPState *s = vc->opaque; |
311 |
|
312 |
qemu_purge_queued_packets(vc); |
313 |
|
314 |
if (s->down_script[0]) |
315 |
launch_script(s->down_script, s->down_script_arg, s->fd); |
316 |
|
317 |
tap_read_poll(s, 0);
|
318 |
tap_write_poll(s, 0);
|
319 |
close(s->fd); |
320 |
qemu_free(s); |
321 |
} |
322 |
|
323 |
/* fd support */
|
324 |
|
325 |
static TAPState *net_tap_fd_init(VLANState *vlan,
|
326 |
const char *model, |
327 |
const char *name, |
328 |
int fd,
|
329 |
int vnet_hdr)
|
330 |
{ |
331 |
TAPState *s; |
332 |
unsigned int offload; |
333 |
|
334 |
s = qemu_mallocz(sizeof(TAPState));
|
335 |
s->fd = fd; |
336 |
s->has_vnet_hdr = vnet_hdr != 0;
|
337 |
s->using_vnet_hdr = 0;
|
338 |
s->vc = qemu_new_vlan_client(NET_CLIENT_TYPE_TAP, |
339 |
vlan, NULL, model, name, NULL, |
340 |
tap_receive, tap_receive_raw, |
341 |
tap_receive_iov, tap_cleanup, s); |
342 |
s->has_ufo = 0;
|
343 |
/* Check if tap supports UFO */
|
344 |
offload = TUN_F_CSUM | TUN_F_UFO; |
345 |
if (ioctl(s->fd, TUNSETOFFLOAD, offload) == 0) |
346 |
s->has_ufo = 1;
|
347 |
tap_set_offload(s->vc, 0, 0, 0, 0, 0); |
348 |
tap_read_poll(s, 1);
|
349 |
return s;
|
350 |
} |
351 |
|
352 |
#ifdef _AIX
|
353 |
int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required) |
354 |
{ |
355 |
fprintf (stderr, "no tap on AIX\n");
|
356 |
return -1; |
357 |
} |
358 |
#else
|
359 |
int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required) |
360 |
{ |
361 |
struct ifreq ifr;
|
362 |
int fd, ret;
|
363 |
|
364 |
TFR(fd = open("/dev/net/tun", O_RDWR));
|
365 |
if (fd < 0) { |
366 |
fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
|
367 |
return -1; |
368 |
} |
369 |
memset(&ifr, 0, sizeof(ifr)); |
370 |
ifr.ifr_flags = IFF_TAP | IFF_NO_PI; |
371 |
|
372 |
if (*vnet_hdr) {
|
373 |
unsigned int features; |
374 |
|
375 |
if (ioctl(fd, TUNGETFEATURES, &features) == 0 && |
376 |
features & IFF_VNET_HDR) { |
377 |
*vnet_hdr = 1;
|
378 |
ifr.ifr_flags |= IFF_VNET_HDR; |
379 |
} |
380 |
|
381 |
if (vnet_hdr_required && !*vnet_hdr) {
|
382 |
qemu_error("vnet_hdr=1 requested, but no kernel "
|
383 |
"support for IFF_VNET_HDR available");
|
384 |
close(fd); |
385 |
return -1; |
386 |
} |
387 |
} |
388 |
|
389 |
if (ifname[0] != '\0') |
390 |
pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname); |
391 |
else
|
392 |
pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d");
|
393 |
ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
|
394 |
if (ret != 0) { |
395 |
fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n");
|
396 |
close(fd); |
397 |
return -1; |
398 |
} |
399 |
pstrcpy(ifname, ifname_size, ifr.ifr_name); |
400 |
fcntl(fd, F_SETFL, O_NONBLOCK); |
401 |
return fd;
|
402 |
} |
403 |
#endif
|
404 |
|
405 |
static int launch_script(const char *setup_script, const char *ifname, int fd) |
406 |
{ |
407 |
sigset_t oldmask, mask; |
408 |
int pid, status;
|
409 |
char *args[3]; |
410 |
char **parg;
|
411 |
|
412 |
sigemptyset(&mask); |
413 |
sigaddset(&mask, SIGCHLD); |
414 |
sigprocmask(SIG_BLOCK, &mask, &oldmask); |
415 |
|
416 |
/* try to launch network script */
|
417 |
pid = fork(); |
418 |
if (pid == 0) { |
419 |
int open_max = sysconf(_SC_OPEN_MAX), i;
|
420 |
|
421 |
for (i = 0; i < open_max; i++) { |
422 |
if (i != STDIN_FILENO &&
|
423 |
i != STDOUT_FILENO && |
424 |
i != STDERR_FILENO && |
425 |
i != fd) { |
426 |
close(i); |
427 |
} |
428 |
} |
429 |
parg = args; |
430 |
*parg++ = (char *)setup_script;
|
431 |
*parg++ = (char *)ifname;
|
432 |
*parg++ = NULL;
|
433 |
execv(setup_script, args); |
434 |
_exit(1);
|
435 |
} else if (pid > 0) { |
436 |
while (waitpid(pid, &status, 0) != pid) { |
437 |
/* loop */
|
438 |
} |
439 |
sigprocmask(SIG_SETMASK, &oldmask, NULL);
|
440 |
|
441 |
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { |
442 |
return 0; |
443 |
} |
444 |
} |
445 |
fprintf(stderr, "%s: could not launch network script\n", setup_script);
|
446 |
return -1; |
447 |
} |
448 |
|
449 |
static int net_tap_init(QemuOpts *opts, int *vnet_hdr) |
450 |
{ |
451 |
int fd, vnet_hdr_required;
|
452 |
char ifname[128] = {0,}; |
453 |
const char *setup_script; |
454 |
|
455 |
if (qemu_opt_get(opts, "ifname")) { |
456 |
pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname")); |
457 |
} |
458 |
|
459 |
*vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1); |
460 |
if (qemu_opt_get(opts, "vnet_hdr")) { |
461 |
vnet_hdr_required = *vnet_hdr; |
462 |
} else {
|
463 |
vnet_hdr_required = 0;
|
464 |
} |
465 |
|
466 |
TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
|
467 |
if (fd < 0) { |
468 |
return -1; |
469 |
} |
470 |
|
471 |
setup_script = qemu_opt_get(opts, "script");
|
472 |
if (setup_script &&
|
473 |
setup_script[0] != '\0' && |
474 |
strcmp(setup_script, "no") != 0 && |
475 |
launch_script(setup_script, ifname, fd)) { |
476 |
close(fd); |
477 |
return -1; |
478 |
} |
479 |
|
480 |
qemu_opt_set(opts, "ifname", ifname);
|
481 |
|
482 |
return fd;
|
483 |
} |
484 |
|
485 |
int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan) |
486 |
{ |
487 |
TAPState *s; |
488 |
int fd, vnet_hdr;
|
489 |
|
490 |
if (qemu_opt_get(opts, "fd")) { |
491 |
if (qemu_opt_get(opts, "ifname") || |
492 |
qemu_opt_get(opts, "script") ||
|
493 |
qemu_opt_get(opts, "downscript") ||
|
494 |
qemu_opt_get(opts, "vnet_hdr")) {
|
495 |
qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
|
496 |
return -1; |
497 |
} |
498 |
|
499 |
fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
|
500 |
if (fd == -1) { |
501 |
return -1; |
502 |
} |
503 |
|
504 |
fcntl(fd, F_SETFL, O_NONBLOCK); |
505 |
|
506 |
vnet_hdr = tap_probe_vnet_hdr(fd); |
507 |
} else {
|
508 |
if (!qemu_opt_get(opts, "script")) { |
509 |
qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
|
510 |
} |
511 |
|
512 |
if (!qemu_opt_get(opts, "downscript")) { |
513 |
qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
|
514 |
} |
515 |
|
516 |
fd = net_tap_init(opts, &vnet_hdr); |
517 |
} |
518 |
|
519 |
s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
|
520 |
if (!s) {
|
521 |
close(fd); |
522 |
return -1; |
523 |
} |
524 |
|
525 |
if (tap_set_sndbuf(s, opts) < 0) { |
526 |
return -1; |
527 |
} |
528 |
|
529 |
if (qemu_opt_get(opts, "fd")) { |
530 |
snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd); |
531 |
} else {
|
532 |
const char *ifname, *script, *downscript; |
533 |
|
534 |
ifname = qemu_opt_get(opts, "ifname");
|
535 |
script = qemu_opt_get(opts, "script");
|
536 |
downscript = qemu_opt_get(opts, "downscript");
|
537 |
|
538 |
snprintf(s->vc->info_str, sizeof(s->vc->info_str),
|
539 |
"ifname=%s,script=%s,downscript=%s",
|
540 |
ifname, script, downscript); |
541 |
|
542 |
if (strcmp(downscript, "no") != 0) { |
543 |
snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); |
544 |
snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname); |
545 |
} |
546 |
} |
547 |
|
548 |
if (vlan) {
|
549 |
vlan->nb_host_devs++; |
550 |
} |
551 |
|
552 |
return 0; |
553 |
} |
554 |
|
555 |
#endif /* !defined(_AIX) */ |