Gracefully handle dead interfaces on periodic RA
[snf-nfdhcpd] / nfdhcpd
1 #!/usr/bin/env python
2 #
3
4 # nfdcpd: A promiscuous, NFQUEUE-based DHCP server for virtual machine hosting
5 # Copyright (c) 2010 GRNET SA
6 #
7 #    This program is free software; you can redistribute it and/or modify
8 #    it under the terms of the GNU General Public License as published by
9 #    the Free Software Foundation; either version 2 of the License, or
10 #    (at your option) any later version.
11 #
12 #    This program is distributed in the hope that it will be useful,
13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 #    GNU General Public License for more details.
16 #
17 #    You should have received a copy of the GNU General Public License along
18 #    with this program; if not, write to the Free Software Foundation, Inc.,
19 #    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #
21
22 import os
23 import re
24 import glob
25 import time
26 import logging
27 import logging.handlers
28 import subprocess
29
30 import daemon
31 import nfqueue
32 import pyinotify
33
34 import IPy
35 from select import select
36 from socket import AF_INET, AF_INET6
37
38 from scapy.layers.l2 import Ether
39 from scapy.layers.inet import IP, UDP
40 from scapy.layers.inet6 import *
41 from scapy.layers.dhcp import BOOTP, DHCP
42 from scapy.sendrecv import sendp
43
44 DEFAULT_PATH = "/var/run/ganeti-dhcpd"
45 DEFAULT_NFQUEUE_NUM = 42
46 DEFAULT_USER = "nobody"
47 DEFAULT_LEASE_TIME = 604800 # 1 week
48 DEFAULT_RENEWAL_TIME = 600  # 10 min
49
50 LOG_FILENAME = "/var/log/nfdhcpd/nfdhcpd.log"
51
52 SYSFS_NET = "/sys/class/net"
53 DHCP_DUMMY_SERVER_IP = "1.2.3.4"
54
55 LOG_FORMAT = "%(asctime)-15s %(levelname)-6s %(message)s"
56 PERIODIC_RA_TIMEOUT = 30 # seconds
57
58 DHCPDISCOVER = 1
59 DHCPOFFER = 2
60 DHCPREQUEST = 3
61 DHCPDECLINE = 4
62 DHCPACK = 5
63 DHCPNAK = 6
64 DHCPRELEASE = 7
65 DHCPINFORM = 8
66
67 DHCP_TYPES = {
68     DHCPDISCOVER: "DHCPDISCOVER",
69     DHCPOFFER: "DHCPOFFER",
70     DHCPREQUEST: "DHCPREQUEST",
71     DHCPDECLINE: "DHCPDECLINE",
72     DHCPACK: "DHCPACK",
73     DHCPNAK: "DHCPNAK",
74     DHCPRELEASE: "DHCPRELEASE",
75     DHCPINFORM: "DHCPINFORM",
76 }
77
78 DHCP_REQRESP = {
79     DHCPDISCOVER: DHCPOFFER,
80     DHCPREQUEST: DHCPACK,
81     DHCPINFORM: DHCPACK,
82     }
83
84 class ClientFileHandler(pyinotify.ProcessEvent):
85     def __init__(self, server):
86         pyinotify.ProcessEvent.__init__(self)
87         self.server = server
88
89     def process_IN_DELETE(self, event):
90         self.server.remove_iface(event.name)
91
92     def process_IN_CLOSE_WRITE(self, event):
93         self.server.add_iface(os.path.join(event.path, event.name))
94
95
96 class Client(object):
97     def __init__(self, mac=None, ips=None, link=None, hostname=None):
98         self.mac = mac
99         self.ips = ips
100         self.hostname = hostname
101         self.link = link
102         self.iface = None
103
104     @property
105     def ip(self):
106         return self.ips[0]
107
108     def is_valid(self):
109         return self.mac is not None and self.ips is not None\
110                and self.hostname is not None
111
112
113 class Subnet(object):
114     def __init__(self, net=None, gw=None, dev=None):
115         if isinstance(net, str):
116             self.net = IPy.IP(net)
117         else:
118             self.net = net
119         self.gw = gw
120         self.dev = dev
121
122     @property
123     def netmask(self):
124         return str(self.net.netmask())
125
126     @property
127     def broadcast(self):
128         return str(self.net.broadcast())
129
130     @property
131     def prefix(self):
132         return self.net.net()
133
134     @property
135     def prefixlen(self):
136         return self.net.prefixlen()
137
138     @staticmethod
139     def _make_eui64(net, mac):
140         """ Compute an EUI-64 address from an EUI-48 (MAC) address
141
142         """
143         comp = mac.split(":")
144         prefix = IPy.IP(net).net().strFullsize().split(":")[:4]
145         eui64 = comp[:3] + ["ff", "fe"] + comp[3:]
146         eui64[0] = "%02x" % (int(eui64[0], 16) ^ 0x02)
147         for l in range(0, len(eui64), 2):
148             prefix += ["".join(eui64[l:l+2])]
149         return IPy.IP(":".join(prefix))
150
151     def make_eui64(self, mac):
152         return self._make_eui64(self.net, mac)
153
154     def make_ll64(self, mac):
155         return self._make_eui64("fe80::", mac)
156
157
158 class VMNetProxy(object):
159     def __init__(self, data_path, dhcp_queue_num=None,
160                  rs_queue_num=None, ns_queue_num=None):
161         self.data_path = data_path
162         self.clients = {}
163         self.subnets = {}
164         self.ifaces = {}
165         self.v6nets = {}
166         self.nfq = {}
167
168         # Inotify setup
169         self.wm = pyinotify.WatchManager()
170         mask = pyinotify.EventsCodes.ALL_FLAGS["IN_DELETE"]
171         mask |= pyinotify.EventsCodes.ALL_FLAGS["IN_CLOSE_WRITE"]
172         handler = ClientFileHandler(self)
173         self.notifier = pyinotify.Notifier(self.wm, handler)
174         self.wm.add_watch(self.data_path, mask, rec=True)
175
176         # NFQUEUE setup
177         if dhcp_queue_num is not None:
178             self._setup_nfqueue(dhcp_queue_num, AF_INET, self.dhcp_response)
179
180         if rs_queue_num is not None:
181             self._setup_nfqueue(rs_queue_num, AF_INET6, self.rs_response)
182
183         if ns_queue_num is not None:
184             self._setup_nfqueue(ns_queue_num, AF_INET6, self.ns_response)
185
186     def _setup_nfqueue(self, queue_num, family, callback):
187         logging.debug("Setting up NFQUEUE for queue %d, AF %s" %
188                       (queue_num, family))
189         q = nfqueue.queue()
190         q.set_callback(callback)
191         q.fast_open(queue_num, family)
192         q.set_queue_maxlen(5000)
193         # This is mandatory for the queue to operate
194         q.set_mode(nfqueue.NFQNL_COPY_PACKET)
195         self.nfq[q.get_fd()] = q
196
197     def build_config(self):
198         self.clients.clear()
199         self.subnets.clear()
200
201         for file in glob.glob(os.path.join(self.data_path, "*")):
202             self.add_iface(file)
203
204     def get_ifindex(self, iface):
205         """ Get the interface index from sysfs
206
207         """
208         file = os.path.abspath(os.path.join(SYSFS_NET, iface, "ifindex"))
209         if not file.startswith(SYSFS_NET):
210             return None
211
212         ifindex = None
213
214         try:
215             f = open(file, 'r')
216             ifindex = int(f.readline().strip())
217             f.close()
218         except IOError:
219             logging.debug("%s is down, removing" % iface)
220             self.remove_iface(iface)
221
222         return ifindex
223
224
225     def get_iface_hw_addr(self, iface):
226         """ Get the interface hardware address from sysfs
227
228         """
229         file = os.path.abspath(os.path.join(SYSFS_NET, iface, "address"))
230         if not file.startswith(SYSFS_NET):
231             return None
232
233         addr = None
234         try:
235             f = open(file, 'r')
236             addr = f.readline().strip()
237             f.close()
238         except IOError:
239             logging.debug("%s is down, removing" % iface)
240             self.remove_iface(iface)
241
242         return addr
243
244     def parse_routing_table(self, table="main", family=4):
245         """ Parse the given routing table to get connected route, gateway and
246         default device.
247
248         """
249         ipro = subprocess.Popen(["ip", "-%d" % family, "ro", "ls",
250                                  "table", table], stdout=subprocess.PIPE)
251         routes = ipro.stdout.readlines()
252
253         def_gw = None
254         def_dev = None
255         def_net = None
256
257         for route in routes:
258             match = re.match(r'^default.*via ([^\s]+).*dev ([^\s]+)', route)
259             if match:
260                 def_gw, def_dev = match.groups()
261                 break
262
263         for route in routes:
264             # Find the least-specific connected route
265             try:
266                 def_net = re.match("^([^\\s]+) dev %s" %
267                                    def_dev, route).groups()[0]
268                 def_net = IPy.IP(def_net)
269             except:
270                 pass
271
272         return Subnet(net=def_net, gw=def_gw, dev=def_dev)
273
274     def parse_binding_file(self, path):
275         """ Read a client configuration from a tap file
276
277         """
278         try:
279             iffile = open(path, 'r')
280         except:
281             return (None, None, None, None)
282         mac = None
283         ips = None
284         link = None
285         hostname = None
286
287         for line in iffile:
288             if line.startswith("IP="):
289                 ip = line.strip().split("=")[1]
290                 ips = ip.split()
291             elif line.startswith("MAC="):
292                 mac = line.strip().split("=")[1]
293             elif line.startswith("LINK="):
294                 link = line.strip().split("=")[1]
295             elif line.startswith("HOSTNAME="):
296                 hostname = line.strip().split("=")[1]
297
298         return Client(mac=mac, ips=ips, link=link, hostname=hostname)
299
300     def add_iface(self, path):
301         """ Add an interface to monitor
302
303         """
304         iface = os.path.basename(path)
305
306         logging.debug("Updating configuration for %s" % iface)
307         binding = self.parse_binding_file(path)
308         ifindex = self.get_ifindex(iface)
309
310         if ifindex is None:
311             logging.warn("Stale configuration for %s found" % iface)
312         else:
313             if binding.is_valid():
314                 binding.iface = iface
315                 self.clients[binding.mac] = binding
316                 self.subnets[binding.link] = self.parse_routing_table(
317                                                 binding.link)
318                 logging.debug("Added client %s on %s" %
319                               (binding.hostname, iface))
320                 self.ifaces[ifindex] = iface
321                 self.v6nets[iface] = self.parse_routing_table(binding.link, 6)
322
323     def remove_iface(self, iface):
324         """ Cleanup clients on a removed interface
325
326         """
327         if iface in self.v6nets:
328             del self.v6nets[iface]
329
330         for mac in self.clients.keys():
331             if self.clients[mac].iface == iface:
332                 del self.clients[mac]
333
334         for ifindex in self.ifaces.keys():
335             if self.ifaces[ifindex] == iface:
336                 del self.ifaces[ifindex]
337
338         logging.debug("Removed interface %s" % iface)
339
340     def dhcp_response(self, i, payload):
341         """ Generate a reply to a BOOTP/DHCP request
342
343         """
344         # Decode the response - NFQUEUE relays IP packets
345         pkt = IP(payload.get_data())
346
347         # Get the actual interface from the ifindex
348         iface = self.ifaces[payload.get_indev()]
349
350         # Signal the kernel that it shouldn't further process the packet
351         payload.set_verdict(nfqueue.NF_DROP)
352
353         # Get the client MAC address
354         resp = pkt.getlayer(BOOTP).copy()
355         hlen = resp.hlen
356         mac = resp.chaddr[:hlen].encode("hex")
357         mac, _ = re.subn(r'([0-9a-fA-F]{2})', r'\1:', mac, hlen-1)
358
359         # Server responses are always BOOTREPLYs
360         resp.op = "BOOTREPLY"
361         del resp.payload
362
363         try:
364             binding = self.clients[mac]
365         except KeyError:
366             logging.warn("Invalid client %s on %s" % (mac, iface))
367             return
368
369         if iface != binding.iface:
370             logging.warn("Received spoofed DHCP request for %s from interface"
371                          " %s instead of %s" %
372                          (mac, iface, binding.iface))
373             return
374
375         resp = Ether(dst=mac, src=self.get_iface_hw_addr(iface))/\
376                IP(src=DHCP_DUMMY_SERVER_IP, dst=binding.ip)/\
377                UDP(sport=pkt.dport, dport=pkt.sport)/resp
378         subnet = self.subnets[binding.link]
379
380         if not DHCP in pkt:
381             logging.warn("Invalid request from %s on %s, no DHCP"
382                          " payload found" % (binding.mac, iface))
383             return
384
385         dhcp_options = []
386         requested_addr = binding.ip
387         for opt in pkt[DHCP].options:
388             if type(opt) is tuple and opt[0] == "message-type":
389                 req_type = opt[1]
390             if type(opt) is tuple and opt[0] == "requested_addr":
391                 requested_addr = opt[1]
392
393         logging.info("%s from %s on %s" %
394                     (DHCP_TYPES.get(req_type, "UNKNOWN"), binding.mac, iface))
395
396         if req_type == DHCPREQUEST and requested_addr != binding.ip:
397             resp_type = DHCPNAK
398             logging.info("Sending DHCPNAK to %s on %s: requested %s"
399                          " instead of %s" %
400                          (binding.mac, iface, requested_addr, binding.ip))
401
402         elif req_type in (DHCPDISCOVER, DHCPREQUEST):
403             resp_type = DHCP_REQRESP[req_type]
404             resp.yiaddr = self.clients[mac].ip
405             dhcp_options += [
406                  ("hostname", binding.hostname),
407                  ("domain", binding.hostname.split('.', 1)[-1]),
408                  ("router", subnet.gw),
409                  ("name_server", "194.177.210.10"),
410                  ("name_server", "194.177.210.211"),
411                  ("broadcast_address", str(subnet.broadcast)),
412                  ("subnet_mask", str(subnet.netmask)),
413                  ("renewal_time", DEFAULT_RENEWAL_TIME),
414                  ("lease_time", DEFAULT_LEASE_TIME),
415             ]
416
417         elif req_type == DHCPINFORM:
418             resp_type = DHCP_REQRESP[req_type]
419             dhcp_options += [
420                  ("hostname", binding.hostname),
421                  ("domain", binding.hostname.split('.', 1)[-1]),
422                  ("name_server", "194.177.210.10"),
423                  ("name_server", "194.177.210.211"),
424             ]
425
426         elif req_type == DHCPRELEASE:
427             # Log and ignore
428             logging.info("DHCPRELEASE from %s on %s" %
429                          (binding.mac, iface))
430             return
431
432         # Finally, always add the server identifier and end options
433         dhcp_options += [
434             ("message-type", resp_type),
435             ("server_id", DHCP_DUMMY_SERVER_IP),
436             "end"
437         ]
438         resp /= DHCP(options=dhcp_options)
439
440         logging.info("%s to %s (%s) on %s" %
441                       (DHCP_TYPES[resp_type], mac, binding.ip, iface))
442         sendp(resp, iface=iface, verbose=False)
443
444     def rs_response(self, i, payload):
445         """ Generate a reply to a BOOTP/DHCP request
446
447         """
448         # Get the actual interface from the ifindex
449         iface = self.ifaces[payload.get_indev()]
450         ifmac = self.get_iface_hw_addr(iface)
451         subnet = self.v6nets[iface]
452         ifll = subnet.make_ll64(ifmac)
453
454         # Signal the kernel that it shouldn't further process the packet
455         payload.set_verdict(nfqueue.NF_DROP)
456
457         resp = Ether(src=self.get_iface_hw_addr(iface))/\
458                IPv6(src=str(ifll))/ICMPv6ND_RA(routerlifetime=14400)/\
459                ICMPv6NDOptPrefixInfo(prefix=str(subnet.prefix),
460                                      prefixlen=subnet.prefixlen)
461
462         logging.info("RA on %s for %s" % (iface, subnet.net))
463         sendp(resp, iface=iface, verbose=False)
464
465     def ns_response(self, i, payload):
466         """ Generate a reply to an ICMPv6 neighbor solicitation
467
468         """
469         # Get the actual interface from the ifindex
470         iface = self.ifaces[payload.get_indev()]
471         ifmac = self.get_iface_hw_addr(iface)
472         subnet = self.v6nets[iface]
473         ifll = subnet.make_ll64(ifmac)
474
475         ns = IPv6(payload.get_data())
476
477         if not (subnet.net.overlaps(ns.tgt) or str(ns.tgt) == str(ifll)):
478             logging.debug("Received NS for a non-routable IP (%s)" % ns.tgt)
479             payload.set_verdict(nfqueue.NF_ACCEPT)
480             return 1
481
482         payload.set_verdict(nfqueue.NF_DROP)
483
484         resp = Ether(src=ifmac, dst=ns.lladdr)/\
485                IPv6(src=str(ifll), dst=ns.src)/\
486                ICMPv6ND_NA(R=1, O=0, S=1, tgt=ns.tgt)/\
487                ICMPv6NDOptDstLLAddr(lladdr=ifmac)
488
489         logging.info("NA on %s for %s" % (iface, ns.tgt))
490         sendp(resp, iface=iface, verbose=False)
491         return 1
492
493     def send_periodic_ra(self):
494         logging.debug("Sending out periodic RAs")
495         start = time.time()
496         i = 0
497         for client in self.clients.values():
498             iface = client.iface
499             ifmac = self.get_iface_hw_addr(iface)
500             if not ifmac:
501                 continue
502
503             subnet = self.v6nets[iface]
504             ifll = subnet.make_ll64(ifmac)
505             resp = Ether(src=ifmac)/\
506                    IPv6(src=str(ifll))/ICMPv6ND_RA(routerlifetime=14400)/\
507                    ICMPv6NDOptPrefixInfo(prefix=str(subnet.prefix),
508                                          prefixlen=subnet.prefixlen)
509             try:
510                 sendp(resp, iface=iface, verbose=False)
511             except:
512                 logging.debug("Periodic RA on %s failed" % iface)
513             i += 1
514         logging.debug("Sent %d RAs in %.2f seconds" % (i, time.time() - start))
515
516     def serve(self):
517         """ Loop forever, serving DHCP requests
518
519         """
520         self.build_config()
521
522         iwfd = self.notifier._fd
523
524         start = time.time()
525         timeout = PERIODIC_RA_TIMEOUT
526         self.send_periodic_ra()
527
528         while True:
529             rlist, _, xlist = select(self.nfq.keys() + [iwfd], [], [], timeout)
530             # First check if there are any inotify (= configuration change)
531             # events
532             if not (rlist or xlist):
533                 # We were woken up by a timeout
534                 start = time.time()
535                 self.send_periodic_ra()
536
537             else:
538                 if iwfd in rlist:
539                     self.notifier.read_events()
540                     self.notifier.process_events()
541                     rlist.remove(iwfd)
542
543                 for fd in rlist:
544                     self.nfq[fd].process_pending()
545
546             # Calculate the new timeout
547             timeout = PERIODIC_RA_TIMEOUT - (time.time() - start)
548
549             # Just to be safe we won't miss anything
550             if timeout <= 0:
551                 logging.debug("Send extra RAs")
552                 self.send_periodic_ra()
553                 timeout = PERIODIC_RA_TIMEOUT
554
555
556
557 if __name__ == "__main__":
558     import optparse
559     from capng import *
560     from pwd import getpwnam, getpwuid
561
562     parser = optparse.OptionParser()
563     parser.add_option("-p", "--path", dest="data_path",
564                       help="The location of the data files", metavar="DIR",
565                       default=DEFAULT_PATH)
566     parser.add_option("-c", "--dhcp-queue", dest="dhcp_queue",
567                       help="The nfqueue to receive DHCP requests from"
568                            " (default: %d" % DEFAULT_NFQUEUE_NUM, type="int",
569                       metavar="NUM", default=DEFAULT_NFQUEUE_NUM)
570     parser.add_option("-r", "--rs-queue", dest="rs_queue",
571                       help="The nfqueue to receive IPv6 router"
572                            " solicitations from (default: %d)" %
573                            DEFAULT_NFQUEUE_NUM, type="int",
574                       metavar="NUM", default=DEFAULT_NFQUEUE_NUM)
575     parser.add_option("-n", "--ns-queue", dest="ns_queue",
576                       help="The nfqueue to receive IPv6 neighbor"
577                            " solicitations from (default: %d)" %
578                            DEFAULT_NFQUEUE_NUM, type="int",
579                       metavar="NUM", default=44)
580     parser.add_option("-u", "--user", dest="user",
581                       help="An unprivileged user to run as",
582                       metavar="UID", default=DEFAULT_USER)
583     parser.add_option("-d", "--debug", action="store_true", dest="debug",
584                       help="Turn on debugging messages")
585     parser.add_option("-f", "--foreground", action="store_false", dest="daemonize",
586                       default=True, help="Do not daemonize, stay in the foreground")
587
588
589     opts, args = parser.parse_args()
590
591     if opts.daemonize:
592         d = daemon.DaemonContext()
593         d.open()
594
595     pidfile = open("/var/run/nfdhcpd.pid", "w")
596     pidfile.write("%s" % os.getpid())
597     pidfile.close()
598
599     logger = logging.getLogger()
600     if opts.debug:
601         logger.setLevel(logging.DEBUG)
602     else:
603         logger.setLevel(logging.INFO)
604
605     if opts.daemonize:
606         handler = logging.handlers.RotatingFileHandler(LOG_FILENAME,
607                                                        maxBytes=2097152)
608     else:
609         handler = logging.StreamHandler()
610
611     handler.setFormatter(logging.Formatter(LOG_FORMAT))
612     logger.addHandler(handler)
613
614     logging.info("Starting up")
615     proxy = VMNetProxy(opts.data_path, opts.dhcp_queue,
616                        opts.rs_queue, opts.ns_queue)
617
618     # Drop all capabilities except CAP_NET_RAW and change uid
619     try:
620         uid = getpwuid(int(opts.user))
621     except ValueError:
622         uid = getpwnam(opts.user)
623
624     logging.info("Setting capabilities and changing uid")
625     logging.debug("User: %s, uid: %d, gid: %d" %
626                   (opts.user, uid.pw_uid, uid.pw_gid))
627     capng_clear(CAPNG_SELECT_BOTH)
628     capng_update(CAPNG_ADD, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_NET_RAW)
629     capng_change_id(uid.pw_uid, uid.pw_gid,
630                     CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING)
631     logging.info("Ready to serve requests")
632     proxy.serve()
633
634
635 # vim: set ts=4 sts=4 sw=4 et :