4 # nfdcpd: A promiscuous, NFQUEUE-based DHCP server for virtual machine hosting
5 # Copyright (c) 2010 GRNET SA
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 import logging.handlers
38 from select import select
39 from socket import AF_INET, AF_INET6
41 from scapy.data import ETH_P_ALL
42 from scapy.packet import BasePacket
43 from scapy.layers.l2 import Ether
44 from scapy.layers.inet import IP, UDP
45 from scapy.layers.inet6 import IPv6, ICMPv6ND_RA, ICMPv6ND_NA, \
46 ICMPv6NDOptDstLLAddr, \
47 ICMPv6NDOptPrefixInfo, \
49 from scapy.layers.dhcp import BOOTP, DHCP
51 DEFAULT_CONFIG = "/etc/nfdhcpd/nfdhcpd.conf"
52 DEFAULT_PATH = "/var/run/ganeti-dhcpd"
53 DEFAULT_USER = "nobody"
54 DEFAULT_LEASE_LIFETIME = 604800 # 1 week
55 DEFAULT_LEASE_RENEWAL = 600 # 10 min
56 DEFAULT_RA_PERIOD = 300 # seconds
57 DHCP_DUMMY_SERVER_IP = "1.2.3.4"
59 LOG_FILENAME = "nfdhcpd.log"
61 SYSFS_NET = "/sys/class/net"
63 LOG_FORMAT = "%(asctime)-15s %(levelname)-6s %(message)s"
65 # Configuration file specification (see configobj documentation)
74 enable_dhcp = boolean(default=True)
75 lease_lifetime = integer(min=0, max=4294967295)
76 lease_renewal = integer(min=0, max=4294967295)
78 dhcp_queue = integer(min=0, max=65535)
79 nameservers = ip_addr_list(family=4)
82 enable_ipv6 = boolean(default=True)
83 ra_period = integer(min=1, max=4294967295)
84 rs_queue = integer(min=0, max=65535)
85 ns_queue = integer(min=0, max=65535)
86 nameservers = ip_addr_list(family=6)
100 DHCPDISCOVER: "DHCPDISCOVER",
101 DHCPOFFER: "DHCPOFFER",
102 DHCPREQUEST: "DHCPREQUEST",
103 DHCPDECLINE: "DHCPDECLINE",
106 DHCPRELEASE: "DHCPRELEASE",
107 DHCPINFORM: "DHCPINFORM",
111 DHCPDISCOVER: DHCPOFFER,
112 DHCPREQUEST: DHCPACK,
117 def parse_routing_table(table="main", family=4):
118 """ Parse the given routing table to get connected route, gateway and
122 ipro = subprocess.Popen(["ip", "-%d" % family, "ro", "ls",
123 "table", table], stdout=subprocess.PIPE)
124 routes = ipro.stdout.readlines()
131 match = re.match(r'^default.*via ([^\s]+).*dev ([^\s]+)', route)
133 def_gw, def_dev = match.groups()
137 # Find the least-specific connected route
138 m = re.match("^([^\\s]+) dev %s" % def_dev, route)
144 def_net = IPy.IP(def_net)
145 except ValueError, e:
146 logging.warn("Unable to parse default route entry %s: %s",
149 return Subnet(net=def_net, gw=def_gw, dev=def_dev)
152 def parse_binding_file(path):
153 """ Read a client configuration from a tap file
157 iffile = open(path, 'r')
158 except EnvironmentError, e:
159 logging.warn("Unable to open binding file %s: %s", path, str(e))
160 return (None, None, None, None)
168 if line.startswith("IP="):
169 ip = line.strip().split("=")[1]
171 elif line.startswith("MAC="):
172 mac = line.strip().split("=")[1]
173 elif line.startswith("LINK="):
174 link = line.strip().split("=")[1]
175 elif line.startswith("HOSTNAME="):
176 hostname = line.strip().split("=")[1]
178 return Client(mac=mac, ips=ips, link=link, hostname=hostname)
181 class ClientFileHandler(pyinotify.ProcessEvent):
182 def __init__(self, server):
183 pyinotify.ProcessEvent.__init__(self)
186 def process_IN_DELETE(self, event): # pylint: disable=C0103
187 """ Delete file handler
189 Currently this removes an interface from the watch list
192 self.server.remove_iface(event.name)
194 def process_IN_CLOSE_WRITE(self, event): # pylint: disable=C0103
197 Currently this adds an interface to the watch list
200 self.server.add_iface(os.path.join(event.path, event.name))
203 class Client(object):
204 def __init__(self, mac=None, ips=None, link=None, hostname=None):
207 self.hostname = hostname
216 return self.mac is not None and self.ips is not None\
217 and self.hostname is not None
220 class Subnet(object):
221 def __init__(self, net=None, gw=None, dev=None):
222 if isinstance(net, str):
223 self.net = IPy.IP(net)
231 """ Return the netmask in textual representation
234 return str(self.net.netmask())
238 """ Return the broadcast address in textual representation
241 return str(self.net.broadcast())
245 """ Return the network as an IPy.IP
248 return self.net.net()
252 """ Return the prefix length as an integer
255 return self.net.prefixlen()
258 def _make_eui64(net, mac):
259 """ Compute an EUI-64 address from an EUI-48 (MAC) address
262 comp = mac.split(":")
263 prefix = IPy.IP(net).net().strFullsize().split(":")[:4]
264 eui64 = comp[:3] + ["ff", "fe"] + comp[3:]
265 eui64[0] = "%02x" % (int(eui64[0], 16) ^ 0x02)
266 for l in range(0, len(eui64), 2):
267 prefix += ["".join(eui64[l:l+2])]
268 return IPy.IP(":".join(prefix))
270 def make_eui64(self, mac):
271 """ Compute an EUI-64 address from an EUI-48 (MAC) address in this
275 return self._make_eui64(self.net, mac)
277 def make_ll64(self, mac):
278 """ Compute an IPv6 Link-local address from an EUI-48 (MAC) address
281 return self._make_eui64("fe80::", mac)
284 class VMNetProxy(object): # pylint: disable=R0902
285 def __init__(self, data_path, dhcp_queue_num=None, # pylint: disable=R0913
286 rs_queue_num=None, ns_queue_num=None,
287 dhcp_lease_lifetime=DEFAULT_LEASE_LIFETIME,
288 dhcp_lease_renewal=DEFAULT_LEASE_RENEWAL,
289 dhcp_server_ip=DHCP_DUMMY_SERVER_IP, dhcp_nameservers=None,
290 ra_period=DEFAULT_RA_PERIOD, ipv6_nameservers=None):
292 self.data_path = data_path
293 self.lease_lifetime = dhcp_lease_lifetime
294 self.lease_renewal = dhcp_lease_renewal
295 self.dhcp_server_ip = dhcp_server_ip
296 self.ra_period = ra_period
297 if dhcp_nameservers is None:
298 self.dhcp_nameserver = []
300 self.dhcp_nameservers = dhcp_nameservers
302 if ipv6_nameservers is None:
303 self.ipv6_nameservers = []
305 self.ipv6_nameservers = ipv6_nameservers
307 self.ipv6_enabled = False
314 self.l2socket = socket.socket(socket.AF_PACKET,
315 socket.SOCK_RAW, ETH_P_ALL)
316 self.l2socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 0)
319 self.wm = pyinotify.WatchManager()
320 mask = pyinotify.EventsCodes.ALL_FLAGS["IN_DELETE"]
321 mask |= pyinotify.EventsCodes.ALL_FLAGS["IN_CLOSE_WRITE"]
322 inotify_handler = ClientFileHandler(self)
323 self.notifier = pyinotify.Notifier(self.wm, inotify_handler)
324 self.wm.add_watch(self.data_path, mask, rec=True)
327 if dhcp_queue_num is not None:
328 self._setup_nfqueue(dhcp_queue_num, AF_INET, self.dhcp_response)
330 if rs_queue_num is not None:
331 self._setup_nfqueue(rs_queue_num, AF_INET6, self.rs_response)
332 self.ipv6_enabled = True
334 if ns_queue_num is not None:
335 self._setup_nfqueue(ns_queue_num, AF_INET6, self.ns_response)
336 self.ipv6_enabled = True
338 def _setup_nfqueue(self, queue_num, family, callback):
339 logging.debug("Setting up NFQUEUE for queue %d, AF %s",
342 q.set_callback(callback)
343 q.fast_open(queue_num, family)
344 q.set_queue_maxlen(5000)
345 # This is mandatory for the queue to operate
346 q.set_mode(nfqueue.NFQNL_COPY_PACKET)
347 self.nfq[q.get_fd()] = q
349 def sendp(self, data, iface):
350 """ Send a raw packet using a layer-2 socket
353 if isinstance(data, BasePacket):
356 self.l2socket.bind((iface, ETH_P_ALL))
357 count = self.l2socket.send(data)
360 logging.warn("Truncated send on %s (%d/%d bytes sent)",
363 def build_config(self):
367 for path in glob.glob(os.path.join(self.data_path, "*")):
370 def get_ifindex(self, iface):
371 """ Get the interface index from sysfs
374 path = os.path.abspath(os.path.join(SYSFS_NET, iface, "ifindex"))
375 if not path.startswith(SYSFS_NET):
382 except EnvironmentError:
383 logging.debug("%s is probably down, removing", iface)
384 self.remove_iface(iface)
389 ifindex = f.readline().strip()
391 ifindex = int(ifindex)
392 except ValueError, e:
393 logging.warn("Failed to get ifindex for %s, cannot parse sysfs"
394 " output '%s'", iface, ifindex)
395 except EnvironmentError, e:
396 logging.warn("Error reading %s's ifindex from sysfs: %s",
398 self.remove_iface(iface)
405 def get_iface_hw_addr(self, iface):
406 """ Get the interface hardware address from sysfs
409 path = os.path.abspath(os.path.join(SYSFS_NET, iface, "address"))
410 if not path.startswith(SYSFS_NET):
416 except EnvironmentError:
417 logging.debug("%s is probably down, removing", iface)
418 self.remove_iface(iface)
422 addr = f.readline().strip()
423 except EnvironmentError, e:
424 logging.warn("Failed to read hw address for %s from sysfs: %s",
431 def add_iface(self, path):
432 """ Add an interface to monitor
435 iface = os.path.basename(path)
437 logging.debug("Updating configuration for %s", iface)
438 binding = parse_binding_file(path)
439 ifindex = self.get_ifindex(iface)
442 logging.warn("Stale configuration for %s found", iface)
444 if binding.is_valid():
445 binding.iface = iface
446 self.clients[binding.mac] = binding
447 self.subnets[binding.link] = parse_routing_table(binding.link)
448 logging.debug("Added client %s on %s", binding.hostname, iface)
449 self.ifaces[ifindex] = iface
450 self.v6nets[iface] = parse_routing_table(binding.link, 6)
452 def remove_iface(self, iface):
453 """ Cleanup clients on a removed interface
456 if iface in self.v6nets:
457 del self.v6nets[iface]
459 for mac in self.clients.keys():
460 if self.clients[mac].iface == iface:
461 del self.clients[mac]
463 for ifindex in self.ifaces.keys():
464 if self.ifaces[ifindex] == iface:
465 del self.ifaces[ifindex]
467 logging.debug("Removed interface %s", iface)
469 def dhcp_response(self, i, payload): # pylint: disable=W0613,R0914
470 """ Generate a reply to a BOOTP/DHCP request
473 # Decode the response - NFQUEUE relays IP packets
474 pkt = IP(payload.get_data())
476 # Get the actual interface from the ifindex
477 iface = self.ifaces[payload.get_indev()]
479 # Signal the kernel that it shouldn't further process the packet
480 payload.set_verdict(nfqueue.NF_DROP)
482 # Get the client MAC address
483 resp = pkt.getlayer(BOOTP).copy()
485 mac = resp.chaddr[:hlen].encode("hex")
486 mac, _ = re.subn(r'([0-9a-fA-F]{2})', r'\1:', mac, hlen-1)
488 # Server responses are always BOOTREPLYs
489 resp.op = "BOOTREPLY"
493 binding = self.clients[mac]
495 logging.warn("Invalid client %s on %s", mac, iface)
498 if iface != binding.iface:
499 logging.warn("Received spoofed DHCP request for %s from interface"
500 " %s instead of %s", mac, iface, binding.iface)
503 resp = Ether(dst=mac, src=self.get_iface_hw_addr(iface))/\
504 IP(src=DHCP_DUMMY_SERVER_IP, dst=binding.ip)/\
505 UDP(sport=pkt.dport, dport=pkt.sport)/resp
506 subnet = self.subnets[binding.link]
509 logging.warn("Invalid request from %s on %s, no DHCP"
510 " payload found", binding.mac, iface)
514 requested_addr = binding.ip
515 for opt in pkt[DHCP].options:
516 if type(opt) is tuple and opt[0] == "message-type":
518 if type(opt) is tuple and opt[0] == "requested_addr":
519 requested_addr = opt[1]
521 logging.info("%s from %s on %s", DHCP_TYPES.get(req_type, "UNKNOWN"),
524 if req_type == DHCPREQUEST and requested_addr != binding.ip:
526 logging.info("Sending DHCPNAK to %s on %s: requested %s"
527 " instead of %s", binding.mac, iface, requested_addr,
530 elif req_type in (DHCPDISCOVER, DHCPREQUEST):
531 resp_type = DHCP_REQRESP[req_type]
532 resp.yiaddr = self.clients[mac].ip
534 ("hostname", binding.hostname),
535 ("domain", binding.hostname.split('.', 1)[-1]),
536 ("router", subnet.gw),
537 ("broadcast_address", str(subnet.broadcast)),
538 ("subnet_mask", str(subnet.netmask)),
539 ("renewal_time", self.lease_renewal),
540 ("lease_time", self.lease_lifetime),
542 dhcp_options += [("name_server", x) for x in self.dhcp_nameservers]
544 elif req_type == DHCPINFORM:
545 resp_type = DHCP_REQRESP[req_type]
547 ("hostname", binding.hostname),
548 ("domain", binding.hostname.split('.', 1)[-1]),
550 dhcp_options += [("name_server", x) for x in self.dhcp_nameservers]
552 elif req_type == DHCPRELEASE:
554 logging.info("DHCPRELEASE from %s on %s", binding.mac, iface)
557 # Finally, always add the server identifier and end options
559 ("message-type", resp_type),
560 ("server_id", DHCP_DUMMY_SERVER_IP),
563 resp /= DHCP(options=dhcp_options)
565 logging.info("%s to %s (%s) on %s", DHCP_TYPES[resp_type], mac,
567 self.sendp(resp, iface)
569 def rs_response(self, i, payload): # pylint: disable=W0613
570 """ Generate a reply to a BOOTP/DHCP request
573 # Get the actual interface from the ifindex
574 iface = self.ifaces[payload.get_indev()]
575 ifmac = self.get_iface_hw_addr(iface)
576 subnet = self.v6nets[iface]
577 ifll = subnet.make_ll64(ifmac)
579 # Signal the kernel that it shouldn't further process the packet
580 payload.set_verdict(nfqueue.NF_DROP)
582 resp = Ether(src=self.get_iface_hw_addr(iface))/\
583 IPv6(src=str(ifll))/ICMPv6ND_RA(routerlifetime=14400)/\
584 ICMPv6NDOptPrefixInfo(prefix=str(subnet.prefix),
585 prefixlen=subnet.prefixlen)
587 if self.ipv6_nameservers:
588 resp /= ICMPv6NDOptRDNSS(dns=self.ipv6_nameservers,
589 lifetime=self.ra_period * 3)
591 logging.info("RA on %s for %s", iface, subnet.net)
592 self.sendp(resp, iface)
594 def ns_response(self, i, payload): # pylint: disable=W0613
595 """ Generate a reply to an ICMPv6 neighbor solicitation
598 # Get the actual interface from the ifindex
599 iface = self.ifaces[payload.get_indev()]
600 ifmac = self.get_iface_hw_addr(iface)
601 subnet = self.v6nets[iface]
602 ifll = subnet.make_ll64(ifmac)
604 ns = IPv6(payload.get_data())
606 if not (subnet.net.overlaps(ns.tgt) or str(ns.tgt) == str(ifll)):
607 logging.debug("Received NS for a non-routable IP (%s)", ns.tgt)
608 payload.set_verdict(nfqueue.NF_ACCEPT)
611 payload.set_verdict(nfqueue.NF_DROP)
614 client_lladdr = ns.lladdr
615 except AttributeError:
618 resp = Ether(src=ifmac, dst=client_lladdr)/\
619 IPv6(src=str(ifll), dst=ns.src)/\
620 ICMPv6ND_NA(R=1, O=0, S=1, tgt=ns.tgt)/\
621 ICMPv6NDOptDstLLAddr(lladdr=ifmac)
623 logging.info("NA on %s for %s", iface, ns.tgt)
624 self.sendp(resp, iface)
627 def send_periodic_ra(self):
628 # Use a separate thread as this may take a _long_ time with
629 # many interfaces and we want to be responsive in the mean time
630 threading.Thread(target=self._send_periodic_ra).start()
632 def _send_periodic_ra(self):
633 logging.debug("Sending out periodic RAs")
636 for client in self.clients.values():
638 ifmac = self.get_iface_hw_addr(iface)
642 subnet = self.v6nets[iface]
643 ifll = subnet.make_ll64(ifmac)
644 resp = Ether(src=ifmac)/\
645 IPv6(src=str(ifll))/ICMPv6ND_RA(routerlifetime=14400)/\
646 ICMPv6NDOptPrefixInfo(prefix=str(subnet.prefix),
647 prefixlen=subnet.prefixlen)
648 if self.ipv6_nameservers:
649 resp /= ICMPv6NDOptRDNSS(dns=self.ipv6_nameservers,
650 lifetime=self.ra_period * 3)
652 self.sendp(resp, iface)
653 except socket.error, e:
654 logging.warn("Periodic RA on %s failed: %s", iface, str(e))
656 logging.warn("Unkown error during periodic RA on %s: %s",
659 logging.debug("Sent %d RAs in %.2f seconds", i, time.time() - start)
662 """ Loop forever, serving DHCP requests
667 # Yes, we are accessing _fd directly, but it's the only way to have a
668 # single select() loop ;-)
669 iwfd = self.notifier._fd # pylint: disable=W0212
672 if self.ipv6_enabled:
673 timeout = self.ra_period
674 self.send_periodic_ra()
679 rlist, _, xlist = select(self.nfq.keys() + [iwfd], [], [], timeout)
681 logging.warn("Warning: Exception on %s",
682 ", ".join([ str(fd) for fd in xlist]))
686 # First check if there are any inotify (= configuration change)
688 self.notifier.read_events()
689 self.notifier.process_events()
694 self.nfq[fd].process_pending()
695 except RuntimeError, e:
696 logging.warn("Error processing fd %d: %s", fd, str(e))
698 logging.warn("Unknown error processing fd %d: %s",
701 if self.ipv6_enabled:
702 # Calculate the new timeout
703 timeout = self.ra_period - (time.time() - start)
707 self.send_periodic_ra()
708 timeout = self.ra_period - (time.time() - start)
711 if __name__ == "__main__":
714 from cStringIO import StringIO
715 from pwd import getpwnam, getpwuid
716 from configobj import ConfigObj, ConfigObjError, flatten_errors
720 validator = validate.Validator()
722 def is_ip_list(value, family=4):
726 raise validate.VdtParamError(family)
727 if isinstance(value, (str, unicode)):
729 if not isinstance(value, list):
730 raise validate.VdtTypeError(value)
736 raise validate.VdtValueError(entry)
738 if ip.version() != family:
739 raise validate.VdtValueError(entry)
742 validator.functions["ip_addr_list"] = is_ip_list
743 config_spec = StringIO(CONFIG_SPEC)
746 parser = optparse.OptionParser()
747 parser.add_option("-c", "--config", dest="config_file",
748 help="The location of the data files", metavar="FILE",
749 default=DEFAULT_CONFIG)
750 parser.add_option("-d", "--debug", action="store_true", dest="debug",
751 help="Turn on debugging messages")
752 parser.add_option("-f", "--foreground", action="store_false",
753 dest="daemonize", default=True,
754 help="Do not daemonize, stay in the foreground")
757 opts, args = parser.parse_args()
760 d = daemon.DaemonContext()
765 config = ConfigObj(opts.config_file, configspec=config_spec)
766 except ConfigObjError, err:
767 sys.stderr.write("Failed to parse config file %s: %s" %
768 (opts.config_file, str(err)))
771 results = config.validate(validator)
773 logging.fatal("Configuration file validation failed! See errors below:")
774 for (section_list, key, unused) in flatten_errors(config, results):
776 logging.fatal(" '%s' in section '%s' failed validation",
777 key, ", ".join(section_list))
779 logging.fatal(" Section '%s' is missing",
780 ", ".join(section_list))
783 pidfile = open(config["general"]["pidfile"], "w")
784 pidfile.write("%s" % os.getpid())
787 logger = logging.getLogger()
789 logger.setLevel(logging.DEBUG)
791 logger.setLevel(logging.INFO)
793 logging.info("Starting up")
796 if config["dhcp"].as_bool("enable_dhcp"):
798 "dhcp_queue_num": config["dhcp"].as_int("dhcp_queue"),
799 "dhcp_lease_lifetime": config["dhcp"].as_int("lease_lifetime"),
800 "dhcp_lease_renewal": config["dhcp"].as_int("lease_renewal"),
801 "dhcp_server_ip": config["dhcp"]["server_ip"],
802 "dhcp_nameservers": config["dhcp"]["nameservers"],
805 if config["ipv6"].as_bool("enable_ipv6"):
807 "rs_queue_num": config["ipv6"].as_int("rs_queue"),
808 "ns_queue_num": config["ipv6"].as_int("ns_queue"),
809 "ra_period": config["ipv6"].as_int("ra_period"),
810 "ipv6_nameservers": config["ipv6"]["nameservers"],
813 # pylint: disable=W0142
814 proxy = VMNetProxy(data_path=config["general"]["datapath"], **proxy_opts)
816 # Drop all capabilities except CAP_NET_RAW and change uid
818 uid = getpwuid(config["general"].as_int("user"))
820 uid = getpwnam(config["general"]["user"])
822 logging.debug("Setting capabilities and changing uid")
823 logging.debug("User: %s, uid: %d, gid: %d",
824 config["general"]["user"], uid.pw_uid, uid.pw_gid)
826 # Keep only the capabilities we need
827 # CAP_NET_ADMIN: we need to send nfqueue packet verdicts to a netlinkgroup
828 capng.capng_clear(capng.CAPNG_SELECT_BOTH)
829 capng.capng_update(capng.CAPNG_ADD,
830 capng.CAPNG_EFFECTIVE|capng.CAPNG_PERMITTED,
832 capng.capng_change_id(uid.pw_uid, uid.pw_gid,
833 capng.CAPNG_DROP_SUPP_GRP|capng.CAPNG_CLEAR_BOUNDING)
836 logfile = os.path.join(config["general"]["logdir"], LOG_FILENAME)
837 handler = logging.handlers.RotatingFileHandler(logfile,
840 handler = logging.StreamHandler()
842 handler.setFormatter(logging.Formatter(LOG_FORMAT))
843 logger.addHandler(handler)
845 logging.info("Ready to serve requests")
849 # vim: set ts=4 sts=4 sw=4 et :