4 # nfdcpd: A promiscuous, NFQUEUE-based DHCP server for virtual machine hosting
5 # Copyright (c) 2010 GRNET SA
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 import logging.handlers
38 from select import select
39 from socket import AF_INET, AF_INET6
41 from scapy.data import ETH_P_ALL
42 from scapy.packet import BasePacket
43 from scapy.layers.l2 import Ether
44 from scapy.layers.inet import IP, UDP
45 from scapy.layers.inet6 import IPv6, ICMPv6ND_RA, ICMPv6ND_NA, \
46 ICMPv6NDOptDstLLAddr, \
47 ICMPv6NDOptPrefixInfo, \
49 from scapy.layers.dhcp import BOOTP, DHCP
51 DEFAULT_CONFIG = "/etc/nfdhcpd/nfdhcpd.conf"
52 DEFAULT_PATH = "/var/run/ganeti-dhcpd"
53 DEFAULT_USER = "nobody"
54 DEFAULT_LEASE_LIFETIME = 604800 # 1 week
55 DEFAULT_LEASE_RENEWAL = 600 # 10 min
56 DEFAULT_RA_PERIOD = 300 # seconds
57 DHCP_DUMMY_SERVER_IP = "1.2.3.4"
59 LOG_FILENAME = "nfdhcpd.log"
61 SYSFS_NET = "/sys/class/net"
63 LOG_FORMAT = "%(asctime)-15s %(levelname)-6s %(message)s"
65 # Configuration file specification (see configobj documentation)
74 enable_dhcp = boolean(default=True)
75 lease_lifetime = integer(min=0, max=4294967295)
76 lease_renewal = integer(min=0, max=4294967295)
78 dhcp_queue = integer(min=0, max=65535)
79 nameservers = ip_addr_list(family=4)
82 enable_ipv6 = boolean(default=True)
83 ra_period = integer(min=1, max=4294967295)
84 rs_queue = integer(min=0, max=65535)
85 ns_queue = integer(min=0, max=65535)
86 nameservers = ip_addr_list(family=6)
100 DHCPDISCOVER: "DHCPDISCOVER",
101 DHCPOFFER: "DHCPOFFER",
102 DHCPREQUEST: "DHCPREQUEST",
103 DHCPDECLINE: "DHCPDECLINE",
106 DHCPRELEASE: "DHCPRELEASE",
107 DHCPINFORM: "DHCPINFORM",
111 DHCPDISCOVER: DHCPOFFER,
112 DHCPREQUEST: DHCPACK,
117 def parse_routing_table(table="main", family=4):
118 """ Parse the given routing table to get connected route, gateway and
122 ipro = subprocess.Popen(["ip", "-%d" % family, "ro", "ls",
123 "table", table], stdout=subprocess.PIPE)
124 routes = ipro.stdout.readlines()
131 match = re.match(r'^default.*via ([^\s]+).*dev ([^\s]+)', route)
133 def_gw, def_dev = match.groups()
137 # Find the least-specific connected route
138 m = re.match("^([^\\s]+) dev %s" % def_dev, route)
144 def_net = IPy.IP(def_net)
145 except ValueError, e:
146 logging.warn("Unable to parse default route entry %s: %s",
149 return Subnet(net=def_net, gw=def_gw, dev=def_dev)
152 def parse_binding_file(path):
153 """ Read a client configuration from a tap file
157 iffile = open(path, 'r')
158 except EnvironmentError, e:
159 logging.warn("Unable to open binding file %s: %s", path, str(e))
168 if line.startswith("IP="):
169 ip = line.strip().split("=")[1]
171 elif line.startswith("MAC="):
172 mac = line.strip().split("=")[1]
173 elif line.startswith("LINK="):
174 link = line.strip().split("=")[1]
175 elif line.startswith("HOSTNAME="):
176 hostname = line.strip().split("=")[1]
178 return Client(mac=mac, ips=ips, link=link, hostname=hostname)
181 class ClientFileHandler(pyinotify.ProcessEvent):
182 def __init__(self, server):
183 pyinotify.ProcessEvent.__init__(self)
186 def process_IN_DELETE(self, event): # pylint: disable=C0103
187 """ Delete file handler
189 Currently this removes an interface from the watch list
192 self.server.remove_iface(event.name)
194 def process_IN_CLOSE_WRITE(self, event): # pylint: disable=C0103
197 Currently this adds an interface to the watch list
200 self.server.add_iface(os.path.join(event.path, event.name))
203 class Client(object):
204 def __init__(self, mac=None, ips=None, link=None, hostname=None):
207 self.hostname = hostname
216 return self.mac is not None and self.ips is not None\
217 and self.hostname is not None
220 class Subnet(object):
221 def __init__(self, net=None, gw=None, dev=None):
222 if isinstance(net, str):
223 self.net = IPy.IP(net)
231 """ Return the netmask in textual representation
234 return str(self.net.netmask())
238 """ Return the broadcast address in textual representation
241 return str(self.net.broadcast())
245 """ Return the network as an IPy.IP
248 return self.net.net()
252 """ Return the prefix length as an integer
255 return self.net.prefixlen()
258 def _make_eui64(net, mac):
259 """ Compute an EUI-64 address from an EUI-48 (MAC) address
262 comp = mac.split(":")
263 prefix = IPy.IP(net).net().strFullsize().split(":")[:4]
264 eui64 = comp[:3] + ["ff", "fe"] + comp[3:]
265 eui64[0] = "%02x" % (int(eui64[0], 16) ^ 0x02)
266 for l in range(0, len(eui64), 2):
267 prefix += ["".join(eui64[l:l+2])]
268 return IPy.IP(":".join(prefix))
270 def make_eui64(self, mac):
271 """ Compute an EUI-64 address from an EUI-48 (MAC) address in this
275 return self._make_eui64(self.net, mac)
277 def make_ll64(self, mac):
278 """ Compute an IPv6 Link-local address from an EUI-48 (MAC) address
281 return self._make_eui64("fe80::", mac)
284 class VMNetProxy(object): # pylint: disable=R0902
285 def __init__(self, data_path, dhcp_queue_num=None, # pylint: disable=R0913
286 rs_queue_num=None, ns_queue_num=None,
287 dhcp_lease_lifetime=DEFAULT_LEASE_LIFETIME,
288 dhcp_lease_renewal=DEFAULT_LEASE_RENEWAL,
289 dhcp_server_ip=DHCP_DUMMY_SERVER_IP, dhcp_nameservers=None,
290 ra_period=DEFAULT_RA_PERIOD, ipv6_nameservers=None):
292 self.data_path = data_path
293 self.lease_lifetime = dhcp_lease_lifetime
294 self.lease_renewal = dhcp_lease_renewal
295 self.dhcp_server_ip = dhcp_server_ip
296 self.ra_period = ra_period
297 if dhcp_nameservers is None:
298 self.dhcp_nameserver = []
300 self.dhcp_nameservers = dhcp_nameservers
302 if ipv6_nameservers is None:
303 self.ipv6_nameservers = []
305 self.ipv6_nameservers = ipv6_nameservers
307 self.ipv6_enabled = False
314 self.l2socket = socket.socket(socket.AF_PACKET,
315 socket.SOCK_RAW, ETH_P_ALL)
316 self.l2socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 0)
319 self.wm = pyinotify.WatchManager()
320 mask = pyinotify.EventsCodes.ALL_FLAGS["IN_DELETE"]
321 mask |= pyinotify.EventsCodes.ALL_FLAGS["IN_CLOSE_WRITE"]
322 inotify_handler = ClientFileHandler(self)
323 self.notifier = pyinotify.Notifier(self.wm, inotify_handler)
324 self.wm.add_watch(self.data_path, mask, rec=True)
327 if dhcp_queue_num is not None:
328 self._setup_nfqueue(dhcp_queue_num, AF_INET, self.dhcp_response)
330 if rs_queue_num is not None:
331 self._setup_nfqueue(rs_queue_num, AF_INET6, self.rs_response)
332 self.ipv6_enabled = True
334 if ns_queue_num is not None:
335 self._setup_nfqueue(ns_queue_num, AF_INET6, self.ns_response)
336 self.ipv6_enabled = True
339 """ Free all resources for a graceful exit
342 logging.info("Cleaning up")
344 logging.debug("Closing netfilter queues")
345 for q in self.nfq.values():
348 logging.debug("Closing socket")
349 self.l2socket.close()
351 logging.debug("Stopping inotify watches")
354 logging.info("Cleanup finished")
356 def _setup_nfqueue(self, queue_num, family, callback):
357 logging.debug("Setting up NFQUEUE for queue %d, AF %s",
360 q.set_callback(callback)
361 q.fast_open(queue_num, family)
362 q.set_queue_maxlen(5000)
363 # This is mandatory for the queue to operate
364 q.set_mode(nfqueue.NFQNL_COPY_PACKET)
365 self.nfq[q.get_fd()] = q
367 def sendp(self, data, iface):
368 """ Send a raw packet using a layer-2 socket
371 if isinstance(data, BasePacket):
374 self.l2socket.bind((iface, ETH_P_ALL))
375 count = self.l2socket.send(data)
378 logging.warn("Truncated send on %s (%d/%d bytes sent)",
381 def build_config(self):
385 for path in glob.glob(os.path.join(self.data_path, "*")):
388 def get_ifindex(self, iface):
389 """ Get the interface index from sysfs
392 path = os.path.abspath(os.path.join(SYSFS_NET, iface, "ifindex"))
393 if not path.startswith(SYSFS_NET):
400 except EnvironmentError:
401 logging.debug("%s is probably down, removing", iface)
402 self.remove_iface(iface)
407 ifindex = f.readline().strip()
409 ifindex = int(ifindex)
410 except ValueError, e:
411 logging.warn("Failed to get ifindex for %s, cannot parse sysfs"
412 " output '%s'", iface, ifindex)
413 except EnvironmentError, e:
414 logging.warn("Error reading %s's ifindex from sysfs: %s",
416 self.remove_iface(iface)
423 def get_iface_hw_addr(self, iface):
424 """ Get the interface hardware address from sysfs
427 path = os.path.abspath(os.path.join(SYSFS_NET, iface, "address"))
428 if not path.startswith(SYSFS_NET):
434 except EnvironmentError:
435 logging.debug("%s is probably down, removing", iface)
436 self.remove_iface(iface)
440 addr = f.readline().strip()
441 except EnvironmentError, e:
442 logging.warn("Failed to read hw address for %s from sysfs: %s",
449 def add_iface(self, path):
450 """ Add an interface to monitor
453 iface = os.path.basename(path)
455 logging.debug("Updating configuration for %s", iface)
456 binding = parse_binding_file(path)
459 ifindex = self.get_ifindex(iface)
462 logging.warn("Stale configuration for %s found", iface)
464 if binding.is_valid():
465 binding.iface = iface
466 self.clients[binding.mac] = binding
467 self.subnets[binding.link] = parse_routing_table(binding.link)
468 logging.debug("Added client %s on %s", binding.hostname, iface)
469 self.ifaces[ifindex] = iface
470 self.v6nets[iface] = parse_routing_table(binding.link, 6)
472 def remove_iface(self, iface):
473 """ Cleanup clients on a removed interface
476 if iface in self.v6nets:
477 del self.v6nets[iface]
479 for mac in self.clients.keys():
480 if self.clients[mac].iface == iface:
481 del self.clients[mac]
483 for ifindex in self.ifaces.keys():
484 if self.ifaces[ifindex] == iface:
485 del self.ifaces[ifindex]
487 logging.debug("Removed interface %s", iface)
489 def dhcp_response(self, i, payload): # pylint: disable=W0613,R0914
490 """ Generate a reply to a BOOTP/DHCP request
493 # Decode the response - NFQUEUE relays IP packets
494 pkt = IP(payload.get_data())
496 # Get the actual interface from the ifindex
497 iface = self.ifaces[payload.get_indev()]
499 # Signal the kernel that it shouldn't further process the packet
500 payload.set_verdict(nfqueue.NF_DROP)
502 # Get the client MAC address
503 resp = pkt.getlayer(BOOTP).copy()
505 mac = resp.chaddr[:hlen].encode("hex")
506 mac, _ = re.subn(r'([0-9a-fA-F]{2})', r'\1:', mac, hlen-1)
508 # Server responses are always BOOTREPLYs
509 resp.op = "BOOTREPLY"
513 binding = self.clients[mac]
515 logging.warn("Invalid client %s on %s", mac, iface)
518 if iface != binding.iface:
519 logging.warn("Received spoofed DHCP request for %s from interface"
520 " %s instead of %s", mac, iface, binding.iface)
523 resp = Ether(dst=mac, src=self.get_iface_hw_addr(iface))/\
524 IP(src=DHCP_DUMMY_SERVER_IP, dst=binding.ip)/\
525 UDP(sport=pkt.dport, dport=pkt.sport)/resp
526 subnet = self.subnets[binding.link]
529 logging.warn("Invalid request from %s on %s, no DHCP"
530 " payload found", binding.mac, iface)
534 requested_addr = binding.ip
535 for opt in pkt[DHCP].options:
536 if type(opt) is tuple and opt[0] == "message-type":
538 if type(opt) is tuple and opt[0] == "requested_addr":
539 requested_addr = opt[1]
541 logging.info("%s from %s on %s", DHCP_TYPES.get(req_type, "UNKNOWN"),
544 if req_type == DHCPREQUEST and requested_addr != binding.ip:
546 logging.info("Sending DHCPNAK to %s on %s: requested %s"
547 " instead of %s", binding.mac, iface, requested_addr,
550 elif req_type in (DHCPDISCOVER, DHCPREQUEST):
551 resp_type = DHCP_REQRESP[req_type]
552 resp.yiaddr = self.clients[mac].ip
554 ("hostname", binding.hostname),
555 ("domain", binding.hostname.split('.', 1)[-1]),
556 ("router", subnet.gw),
557 ("broadcast_address", str(subnet.broadcast)),
558 ("subnet_mask", str(subnet.netmask)),
559 ("renewal_time", self.lease_renewal),
560 ("lease_time", self.lease_lifetime),
562 dhcp_options += [("name_server", x) for x in self.dhcp_nameservers]
564 elif req_type == DHCPINFORM:
565 resp_type = DHCP_REQRESP[req_type]
567 ("hostname", binding.hostname),
568 ("domain", binding.hostname.split('.', 1)[-1]),
570 dhcp_options += [("name_server", x) for x in self.dhcp_nameservers]
572 elif req_type == DHCPRELEASE:
574 logging.info("DHCPRELEASE from %s on %s", binding.mac, iface)
577 # Finally, always add the server identifier and end options
579 ("message-type", resp_type),
580 ("server_id", DHCP_DUMMY_SERVER_IP),
583 resp /= DHCP(options=dhcp_options)
585 logging.info("%s to %s (%s) on %s", DHCP_TYPES[resp_type], mac,
587 self.sendp(resp, iface)
589 def rs_response(self, i, payload): # pylint: disable=W0613
590 """ Generate a reply to a BOOTP/DHCP request
593 # Get the actual interface from the ifindex
594 iface = self.ifaces[payload.get_indev()]
595 ifmac = self.get_iface_hw_addr(iface)
596 subnet = self.v6nets[iface]
597 ifll = subnet.make_ll64(ifmac)
599 # Signal the kernel that it shouldn't further process the packet
600 payload.set_verdict(nfqueue.NF_DROP)
602 resp = Ether(src=self.get_iface_hw_addr(iface))/\
603 IPv6(src=str(ifll))/ICMPv6ND_RA(routerlifetime=14400)/\
604 ICMPv6NDOptPrefixInfo(prefix=str(subnet.prefix),
605 prefixlen=subnet.prefixlen)
607 if self.ipv6_nameservers:
608 resp /= ICMPv6NDOptRDNSS(dns=self.ipv6_nameservers,
609 lifetime=self.ra_period * 3)
611 logging.info("RA on %s for %s", iface, subnet.net)
612 self.sendp(resp, iface)
614 def ns_response(self, i, payload): # pylint: disable=W0613
615 """ Generate a reply to an ICMPv6 neighbor solicitation
618 # Get the actual interface from the ifindex
619 iface = self.ifaces[payload.get_indev()]
620 ifmac = self.get_iface_hw_addr(iface)
621 subnet = self.v6nets[iface]
622 ifll = subnet.make_ll64(ifmac)
624 ns = IPv6(payload.get_data())
626 if not (subnet.net.overlaps(ns.tgt) or str(ns.tgt) == str(ifll)):
627 logging.debug("Received NS for a non-routable IP (%s)", ns.tgt)
628 payload.set_verdict(nfqueue.NF_ACCEPT)
631 payload.set_verdict(nfqueue.NF_DROP)
634 client_lladdr = ns.lladdr
635 except AttributeError:
638 resp = Ether(src=ifmac, dst=client_lladdr)/\
639 IPv6(src=str(ifll), dst=ns.src)/\
640 ICMPv6ND_NA(R=1, O=0, S=1, tgt=ns.tgt)/\
641 ICMPv6NDOptDstLLAddr(lladdr=ifmac)
643 logging.info("NA on %s for %s", iface, ns.tgt)
644 self.sendp(resp, iface)
647 def send_periodic_ra(self):
648 # Use a separate thread as this may take a _long_ time with
649 # many interfaces and we want to be responsive in the mean time
650 threading.Thread(target=self._send_periodic_ra).start()
652 def _send_periodic_ra(self):
653 logging.debug("Sending out periodic RAs")
656 for client in self.clients.values():
658 ifmac = self.get_iface_hw_addr(iface)
662 subnet = self.v6nets[iface]
663 ifll = subnet.make_ll64(ifmac)
664 resp = Ether(src=ifmac)/\
665 IPv6(src=str(ifll))/ICMPv6ND_RA(routerlifetime=14400)/\
666 ICMPv6NDOptPrefixInfo(prefix=str(subnet.prefix),
667 prefixlen=subnet.prefixlen)
668 if self.ipv6_nameservers:
669 resp /= ICMPv6NDOptRDNSS(dns=self.ipv6_nameservers,
670 lifetime=self.ra_period * 3)
672 self.sendp(resp, iface)
673 except socket.error, e:
674 logging.warn("Periodic RA on %s failed: %s", iface, str(e))
676 logging.warn("Unkown error during periodic RA on %s: %s",
679 logging.debug("Sent %d RAs in %.2f seconds", i, time.time() - start)
682 """ Safely perform the main loop, freeing all resources upon exit
691 """ Loop forever, serving DHCP requests
696 # Yes, we are accessing _fd directly, but it's the only way to have a
697 # single select() loop ;-)
698 iwfd = self.notifier._fd # pylint: disable=W0212
701 if self.ipv6_enabled:
702 timeout = self.ra_period
703 self.send_periodic_ra()
708 rlist, _, xlist = select(self.nfq.keys() + [iwfd], [], [], timeout)
710 logging.warn("Warning: Exception on %s",
711 ", ".join([ str(fd) for fd in xlist]))
715 # First check if there are any inotify (= configuration change)
717 self.notifier.read_events()
718 self.notifier.process_events()
723 self.nfq[fd].process_pending()
724 except RuntimeError, e:
725 logging.warn("Error processing fd %d: %s", fd, str(e))
727 logging.warn("Unknown error processing fd %d: %s",
730 if self.ipv6_enabled:
731 # Calculate the new timeout
732 timeout = self.ra_period - (time.time() - start)
736 self.send_periodic_ra()
737 timeout = self.ra_period - (time.time() - start)
740 if __name__ == "__main__":
743 from cStringIO import StringIO
744 from pwd import getpwnam, getpwuid
745 from configobj import ConfigObj, ConfigObjError, flatten_errors
749 validator = validate.Validator()
751 def is_ip_list(value, family=4):
755 raise validate.VdtParamError(family)
756 if isinstance(value, (str, unicode)):
758 if not isinstance(value, list):
759 raise validate.VdtTypeError(value)
765 raise validate.VdtValueError(entry)
767 if ip.version() != family:
768 raise validate.VdtValueError(entry)
771 validator.functions["ip_addr_list"] = is_ip_list
772 config_spec = StringIO(CONFIG_SPEC)
775 parser = optparse.OptionParser()
776 parser.add_option("-c", "--config", dest="config_file",
777 help="The location of the data files", metavar="FILE",
778 default=DEFAULT_CONFIG)
779 parser.add_option("-d", "--debug", action="store_true", dest="debug",
780 help="Turn on debugging messages")
781 parser.add_option("-f", "--foreground", action="store_false",
782 dest="daemonize", default=True,
783 help="Do not daemonize, stay in the foreground")
786 opts, args = parser.parse_args()
789 d = daemon.DaemonContext()
794 config = ConfigObj(opts.config_file, configspec=config_spec)
795 except ConfigObjError, err:
796 sys.stderr.write("Failed to parse config file %s: %s" %
797 (opts.config_file, str(err)))
800 results = config.validate(validator)
802 logging.fatal("Configuration file validation failed! See errors below:")
803 for (section_list, key, unused) in flatten_errors(config, results):
805 logging.fatal(" '%s' in section '%s' failed validation",
806 key, ", ".join(section_list))
808 logging.fatal(" Section '%s' is missing",
809 ", ".join(section_list))
812 pidfile = open(config["general"]["pidfile"], "w")
813 pidfile.write("%s" % os.getpid())
816 logger = logging.getLogger()
818 logger.setLevel(logging.DEBUG)
820 logger.setLevel(logging.INFO)
822 logging.info("Starting up")
825 if config["dhcp"].as_bool("enable_dhcp"):
827 "dhcp_queue_num": config["dhcp"].as_int("dhcp_queue"),
828 "dhcp_lease_lifetime": config["dhcp"].as_int("lease_lifetime"),
829 "dhcp_lease_renewal": config["dhcp"].as_int("lease_renewal"),
830 "dhcp_server_ip": config["dhcp"]["server_ip"],
831 "dhcp_nameservers": config["dhcp"]["nameservers"],
834 if config["ipv6"].as_bool("enable_ipv6"):
836 "rs_queue_num": config["ipv6"].as_int("rs_queue"),
837 "ns_queue_num": config["ipv6"].as_int("ns_queue"),
838 "ra_period": config["ipv6"].as_int("ra_period"),
839 "ipv6_nameservers": config["ipv6"]["nameservers"],
842 # pylint: disable=W0142
843 proxy = VMNetProxy(data_path=config["general"]["datapath"], **proxy_opts)
845 # Drop all capabilities except CAP_NET_RAW and change uid
847 uid = getpwuid(config["general"].as_int("user"))
849 uid = getpwnam(config["general"]["user"])
851 logging.debug("Setting capabilities and changing uid")
852 logging.debug("User: %s, uid: %d, gid: %d",
853 config["general"]["user"], uid.pw_uid, uid.pw_gid)
855 # Keep only the capabilities we need
856 # CAP_NET_ADMIN: we need to send nfqueue packet verdicts to a netlinkgroup
857 capng.capng_clear(capng.CAPNG_SELECT_BOTH)
858 capng.capng_update(capng.CAPNG_ADD,
859 capng.CAPNG_EFFECTIVE|capng.CAPNG_PERMITTED,
861 capng.capng_change_id(uid.pw_uid, uid.pw_gid,
862 capng.CAPNG_DROP_SUPP_GRP|capng.CAPNG_CLEAR_BOUNDING)
865 logfile = os.path.join(config["general"]["logdir"], LOG_FILENAME)
866 handler = logging.handlers.RotatingFileHandler(logfile,
869 handler = logging.StreamHandler()
871 handler.setFormatter(logging.Formatter(LOG_FORMAT))
872 logger.addHandler(handler)
874 logging.info("Ready to serve requests")
878 # vim: set ts=4 sts=4 sw=4 et :