Refactor the main loop code and increase RA period
[snf-nfdhcpd] / nfdhcpd
1 #!/usr/bin/env python
2 #
3
4 # nfdcpd: A promiscuous, NFQUEUE-based DHCP server for virtual machine hosting
5 # Copyright (c) 2010 GRNET SA
6 #
7 #    This program is free software; you can redistribute it and/or modify
8 #    it under the terms of the GNU General Public License as published by
9 #    the Free Software Foundation; either version 2 of the License, or
10 #    (at your option) any later version.
11 #
12 #    This program is distributed in the hope that it will be useful,
13 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 #    GNU General Public License for more details.
16 #
17 #    You should have received a copy of the GNU General Public License along
18 #    with this program; if not, write to the Free Software Foundation, Inc.,
19 #    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #
21
22 import os
23 import re
24 import glob
25 import time
26 import logging
27 import logging.handlers
28 import threading
29 import subprocess
30
31 import daemon
32 import nfqueue
33 import pyinotify
34
35 import IPy
36 from select import select
37 from socket import AF_INET, AF_INET6
38
39 from scapy.layers.l2 import Ether
40 from scapy.layers.inet import IP, UDP
41 from scapy.layers.inet6 import *
42 from scapy.layers.dhcp import BOOTP, DHCP
43 from scapy.sendrecv import sendp
44
45 DEFAULT_PATH = "/var/run/ganeti-dhcpd"
46 DEFAULT_NFQUEUE_NUM = 42
47 DEFAULT_USER = "nobody"
48 DEFAULT_LEASE_TIME = 604800 # 1 week
49 DEFAULT_RENEWAL_TIME = 600  # 10 min
50
51 LOG_FILENAME = "/var/log/nfdhcpd/nfdhcpd.log"
52
53 SYSFS_NET = "/sys/class/net"
54 DHCP_DUMMY_SERVER_IP = "1.2.3.4"
55
56 LOG_FORMAT = "%(asctime)-15s %(levelname)-6s %(message)s"
57 PERIODIC_RA_TIMEOUT = 300 # seconds
58
59 DHCPDISCOVER = 1
60 DHCPOFFER = 2
61 DHCPREQUEST = 3
62 DHCPDECLINE = 4
63 DHCPACK = 5
64 DHCPNAK = 6
65 DHCPRELEASE = 7
66 DHCPINFORM = 8
67
68 DHCP_TYPES = {
69     DHCPDISCOVER: "DHCPDISCOVER",
70     DHCPOFFER: "DHCPOFFER",
71     DHCPREQUEST: "DHCPREQUEST",
72     DHCPDECLINE: "DHCPDECLINE",
73     DHCPACK: "DHCPACK",
74     DHCPNAK: "DHCPNAK",
75     DHCPRELEASE: "DHCPRELEASE",
76     DHCPINFORM: "DHCPINFORM",
77 }
78
79 DHCP_REQRESP = {
80     DHCPDISCOVER: DHCPOFFER,
81     DHCPREQUEST: DHCPACK,
82     DHCPINFORM: DHCPACK,
83     }
84
85
86 class ClientFileHandler(pyinotify.ProcessEvent):
87     def __init__(self, server):
88         pyinotify.ProcessEvent.__init__(self)
89         self.server = server
90
91     def process_IN_DELETE(self, event):
92         self.server.remove_iface(event.name)
93
94     def process_IN_CLOSE_WRITE(self, event):
95         self.server.add_iface(os.path.join(event.path, event.name))
96
97
98 class Client(object):
99     def __init__(self, mac=None, ips=None, link=None, hostname=None):
100         self.mac = mac
101         self.ips = ips
102         self.hostname = hostname
103         self.link = link
104         self.iface = None
105
106     @property
107     def ip(self):
108         return self.ips[0]
109
110     def is_valid(self):
111         return self.mac is not None and self.ips is not None\
112                and self.hostname is not None
113
114
115 class Subnet(object):
116     def __init__(self, net=None, gw=None, dev=None):
117         if isinstance(net, str):
118             self.net = IPy.IP(net)
119         else:
120             self.net = net
121         self.gw = gw
122         self.dev = dev
123
124     @property
125     def netmask(self):
126         return str(self.net.netmask())
127
128     @property
129     def broadcast(self):
130         return str(self.net.broadcast())
131
132     @property
133     def prefix(self):
134         return self.net.net()
135
136     @property
137     def prefixlen(self):
138         return self.net.prefixlen()
139
140     @staticmethod
141     def _make_eui64(net, mac):
142         """ Compute an EUI-64 address from an EUI-48 (MAC) address
143
144         """
145         comp = mac.split(":")
146         prefix = IPy.IP(net).net().strFullsize().split(":")[:4]
147         eui64 = comp[:3] + ["ff", "fe"] + comp[3:]
148         eui64[0] = "%02x" % (int(eui64[0], 16) ^ 0x02)
149         for l in range(0, len(eui64), 2):
150             prefix += ["".join(eui64[l:l+2])]
151         return IPy.IP(":".join(prefix))
152
153     def make_eui64(self, mac):
154         return self._make_eui64(self.net, mac)
155
156     def make_ll64(self, mac):
157         return self._make_eui64("fe80::", mac)
158
159
160 class VMNetProxy(object):
161     def __init__(self, data_path, dhcp_queue_num=None,
162                  rs_queue_num=None, ns_queue_num=None):
163         self.data_path = data_path
164         self.clients = {}
165         self.subnets = {}
166         self.ifaces = {}
167         self.v6nets = {}
168         self.nfq = {}
169
170         # Inotify setup
171         self.wm = pyinotify.WatchManager()
172         mask = pyinotify.EventsCodes.ALL_FLAGS["IN_DELETE"]
173         mask |= pyinotify.EventsCodes.ALL_FLAGS["IN_CLOSE_WRITE"]
174         handler = ClientFileHandler(self)
175         self.notifier = pyinotify.Notifier(self.wm, handler)
176         self.wm.add_watch(self.data_path, mask, rec=True)
177
178         # NFQUEUE setup
179         if dhcp_queue_num is not None:
180             self._setup_nfqueue(dhcp_queue_num, AF_INET, self.dhcp_response)
181
182         if rs_queue_num is not None:
183             self._setup_nfqueue(rs_queue_num, AF_INET6, self.rs_response)
184
185         if ns_queue_num is not None:
186             self._setup_nfqueue(ns_queue_num, AF_INET6, self.ns_response)
187
188     def _setup_nfqueue(self, queue_num, family, callback):
189         logging.debug("Setting up NFQUEUE for queue %d, AF %s" %
190                       (queue_num, family))
191         q = nfqueue.queue()
192         q.set_callback(callback)
193         q.fast_open(queue_num, family)
194         q.set_queue_maxlen(5000)
195         # This is mandatory for the queue to operate
196         q.set_mode(nfqueue.NFQNL_COPY_PACKET)
197         self.nfq[q.get_fd()] = q
198
199     def build_config(self):
200         self.clients.clear()
201         self.subnets.clear()
202
203         for file in glob.glob(os.path.join(self.data_path, "*")):
204             self.add_iface(file)
205
206     def get_ifindex(self, iface):
207         """ Get the interface index from sysfs
208
209         """
210         file = os.path.abspath(os.path.join(SYSFS_NET, iface, "ifindex"))
211         if not file.startswith(SYSFS_NET):
212             return None
213
214         ifindex = None
215
216         try:
217             f = open(file, 'r')
218             ifindex = int(f.readline().strip())
219             f.close()
220         except IOError:
221             logging.debug("%s is down, removing" % iface)
222             self.remove_iface(iface)
223
224         return ifindex
225
226
227     def get_iface_hw_addr(self, iface):
228         """ Get the interface hardware address from sysfs
229
230         """
231         file = os.path.abspath(os.path.join(SYSFS_NET, iface, "address"))
232         if not file.startswith(SYSFS_NET):
233             return None
234
235         addr = None
236         try:
237             f = open(file, 'r')
238             addr = f.readline().strip()
239             f.close()
240         except IOError:
241             logging.debug("%s is down, removing" % iface)
242             self.remove_iface(iface)
243
244         return addr
245
246     def parse_routing_table(self, table="main", family=4):
247         """ Parse the given routing table to get connected route, gateway and
248         default device.
249
250         """
251         ipro = subprocess.Popen(["ip", "-%d" % family, "ro", "ls",
252                                  "table", table], stdout=subprocess.PIPE)
253         routes = ipro.stdout.readlines()
254
255         def_gw = None
256         def_dev = None
257         def_net = None
258
259         for route in routes:
260             match = re.match(r'^default.*via ([^\s]+).*dev ([^\s]+)', route)
261             if match:
262                 def_gw, def_dev = match.groups()
263                 break
264
265         for route in routes:
266             # Find the least-specific connected route
267             try:
268                 def_net = re.match("^([^\\s]+) dev %s" %
269                                    def_dev, route).groups()[0]
270                 def_net = IPy.IP(def_net)
271             except:
272                 pass
273
274         return Subnet(net=def_net, gw=def_gw, dev=def_dev)
275
276     def parse_binding_file(self, path):
277         """ Read a client configuration from a tap file
278
279         """
280         try:
281             iffile = open(path, 'r')
282         except:
283             return (None, None, None, None)
284         mac = None
285         ips = None
286         link = None
287         hostname = None
288
289         for line in iffile:
290             if line.startswith("IP="):
291                 ip = line.strip().split("=")[1]
292                 ips = ip.split()
293             elif line.startswith("MAC="):
294                 mac = line.strip().split("=")[1]
295             elif line.startswith("LINK="):
296                 link = line.strip().split("=")[1]
297             elif line.startswith("HOSTNAME="):
298                 hostname = line.strip().split("=")[1]
299
300         return Client(mac=mac, ips=ips, link=link, hostname=hostname)
301
302     def add_iface(self, path):
303         """ Add an interface to monitor
304
305         """
306         iface = os.path.basename(path)
307
308         logging.debug("Updating configuration for %s" % iface)
309         binding = self.parse_binding_file(path)
310         ifindex = self.get_ifindex(iface)
311
312         if ifindex is None:
313             logging.warn("Stale configuration for %s found" % iface)
314         else:
315             if binding.is_valid():
316                 binding.iface = iface
317                 self.clients[binding.mac] = binding
318                 self.subnets[binding.link] = self.parse_routing_table(
319                                                 binding.link)
320                 logging.debug("Added client %s on %s" %
321                               (binding.hostname, iface))
322                 self.ifaces[ifindex] = iface
323                 self.v6nets[iface] = self.parse_routing_table(binding.link, 6)
324
325     def remove_iface(self, iface):
326         """ Cleanup clients on a removed interface
327
328         """
329         if iface in self.v6nets:
330             del self.v6nets[iface]
331
332         for mac in self.clients.keys():
333             if self.clients[mac].iface == iface:
334                 del self.clients[mac]
335
336         for ifindex in self.ifaces.keys():
337             if self.ifaces[ifindex] == iface:
338                 del self.ifaces[ifindex]
339
340         logging.debug("Removed interface %s" % iface)
341
342     def dhcp_response(self, i, payload):
343         """ Generate a reply to a BOOTP/DHCP request
344
345         """
346         # Decode the response - NFQUEUE relays IP packets
347         pkt = IP(payload.get_data())
348
349         # Get the actual interface from the ifindex
350         iface = self.ifaces[payload.get_indev()]
351
352         # Signal the kernel that it shouldn't further process the packet
353         payload.set_verdict(nfqueue.NF_DROP)
354
355         # Get the client MAC address
356         resp = pkt.getlayer(BOOTP).copy()
357         hlen = resp.hlen
358         mac = resp.chaddr[:hlen].encode("hex")
359         mac, _ = re.subn(r'([0-9a-fA-F]{2})', r'\1:', mac, hlen-1)
360
361         # Server responses are always BOOTREPLYs
362         resp.op = "BOOTREPLY"
363         del resp.payload
364
365         try:
366             binding = self.clients[mac]
367         except KeyError:
368             logging.warn("Invalid client %s on %s" % (mac, iface))
369             return
370
371         if iface != binding.iface:
372             logging.warn("Received spoofed DHCP request for %s from interface"
373                          " %s instead of %s" %
374                          (mac, iface, binding.iface))
375             return
376
377         resp = Ether(dst=mac, src=self.get_iface_hw_addr(iface))/\
378                IP(src=DHCP_DUMMY_SERVER_IP, dst=binding.ip)/\
379                UDP(sport=pkt.dport, dport=pkt.sport)/resp
380         subnet = self.subnets[binding.link]
381
382         if not DHCP in pkt:
383             logging.warn("Invalid request from %s on %s, no DHCP"
384                          " payload found" % (binding.mac, iface))
385             return
386
387         dhcp_options = []
388         requested_addr = binding.ip
389         for opt in pkt[DHCP].options:
390             if type(opt) is tuple and opt[0] == "message-type":
391                 req_type = opt[1]
392             if type(opt) is tuple and opt[0] == "requested_addr":
393                 requested_addr = opt[1]
394
395         logging.info("%s from %s on %s" %
396                     (DHCP_TYPES.get(req_type, "UNKNOWN"), binding.mac, iface))
397
398         if req_type == DHCPREQUEST and requested_addr != binding.ip:
399             resp_type = DHCPNAK
400             logging.info("Sending DHCPNAK to %s on %s: requested %s"
401                          " instead of %s" %
402                          (binding.mac, iface, requested_addr, binding.ip))
403
404         elif req_type in (DHCPDISCOVER, DHCPREQUEST):
405             resp_type = DHCP_REQRESP[req_type]
406             resp.yiaddr = self.clients[mac].ip
407             dhcp_options += [
408                  ("hostname", binding.hostname),
409                  ("domain", binding.hostname.split('.', 1)[-1]),
410                  ("router", subnet.gw),
411                  ("name_server", "194.177.210.10"),
412                  ("name_server", "194.177.210.211"),
413                  ("broadcast_address", str(subnet.broadcast)),
414                  ("subnet_mask", str(subnet.netmask)),
415                  ("renewal_time", DEFAULT_RENEWAL_TIME),
416                  ("lease_time", DEFAULT_LEASE_TIME),
417             ]
418
419         elif req_type == DHCPINFORM:
420             resp_type = DHCP_REQRESP[req_type]
421             dhcp_options += [
422                  ("hostname", binding.hostname),
423                  ("domain", binding.hostname.split('.', 1)[-1]),
424                  ("name_server", "194.177.210.10"),
425                  ("name_server", "194.177.210.211"),
426             ]
427
428         elif req_type == DHCPRELEASE:
429             # Log and ignore
430             logging.info("DHCPRELEASE from %s on %s" %
431                          (binding.mac, iface))
432             return
433
434         # Finally, always add the server identifier and end options
435         dhcp_options += [
436             ("message-type", resp_type),
437             ("server_id", DHCP_DUMMY_SERVER_IP),
438             "end"
439         ]
440         resp /= DHCP(options=dhcp_options)
441
442         logging.info("%s to %s (%s) on %s" %
443                       (DHCP_TYPES[resp_type], mac, binding.ip, iface))
444         sendp(resp, iface=iface, verbose=False)
445
446     def rs_response(self, i, payload):
447         """ Generate a reply to a BOOTP/DHCP request
448
449         """
450         # Get the actual interface from the ifindex
451         iface = self.ifaces[payload.get_indev()]
452         ifmac = self.get_iface_hw_addr(iface)
453         subnet = self.v6nets[iface]
454         ifll = subnet.make_ll64(ifmac)
455
456         # Signal the kernel that it shouldn't further process the packet
457         payload.set_verdict(nfqueue.NF_DROP)
458
459         resp = Ether(src=self.get_iface_hw_addr(iface))/\
460                IPv6(src=str(ifll))/ICMPv6ND_RA(routerlifetime=14400)/\
461                ICMPv6NDOptPrefixInfo(prefix=str(subnet.prefix),
462                                      prefixlen=subnet.prefixlen)
463
464         logging.info("RA on %s for %s" % (iface, subnet.net))
465         sendp(resp, iface=iface, verbose=False)
466
467     def ns_response(self, i, payload):
468         """ Generate a reply to an ICMPv6 neighbor solicitation
469
470         """
471         # Get the actual interface from the ifindex
472         iface = self.ifaces[payload.get_indev()]
473         ifmac = self.get_iface_hw_addr(iface)
474         subnet = self.v6nets[iface]
475         ifll = subnet.make_ll64(ifmac)
476
477         ns = IPv6(payload.get_data())
478
479         if not (subnet.net.overlaps(ns.tgt) or str(ns.tgt) == str(ifll)):
480             logging.debug("Received NS for a non-routable IP (%s)" % ns.tgt)
481             payload.set_verdict(nfqueue.NF_ACCEPT)
482             return 1
483
484         payload.set_verdict(nfqueue.NF_DROP)
485
486         try:
487             client_lladdr = ns.lladdr
488         except AttributeError:
489             return 1
490
491         resp = Ether(src=ifmac, dst=client_lladdr)/\
492                IPv6(src=str(ifll), dst=ns.src)/\
493                ICMPv6ND_NA(R=1, O=0, S=1, tgt=ns.tgt)/\
494                ICMPv6NDOptDstLLAddr(lladdr=ifmac)
495
496         logging.info("NA on %s for %s" % (iface, ns.tgt))
497         sendp(resp, iface=iface, verbose=False)
498         return 1
499
500     def send_periodic_ra(self):
501         # Use a separate thread as this may take a _long_ time with
502         # many interfaces and we want to be responsive in the mean time
503         threading.Thread(target=self._send_periodic_ra).start()
504
505     def _send_periodic_ra(self):
506         logging.debug("Sending out periodic RAs")
507         start = time.time()
508         i = 0
509         for client in self.clients.values():
510             iface = client.iface
511             ifmac = self.get_iface_hw_addr(iface)
512             if not ifmac:
513                 continue
514
515             subnet = self.v6nets[iface]
516             ifll = subnet.make_ll64(ifmac)
517             resp = Ether(src=ifmac)/\
518                    IPv6(src=str(ifll))/ICMPv6ND_RA(routerlifetime=14400)/\
519                    ICMPv6NDOptPrefixInfo(prefix=str(subnet.prefix),
520                                          prefixlen=subnet.prefixlen)
521             try:
522                 sendp(resp, iface=iface, verbose=False)
523             except:
524                 logging.debug("Periodic RA on %s failed" % iface)
525             i += 1
526         logging.debug("Sent %d RAs in %.2f seconds" % (i, time.time() - start))
527
528     def serve(self):
529         """ Loop forever, serving DHCP requests
530
531         """
532         self.build_config()
533
534         iwfd = self.notifier._fd
535
536         start = time.time()
537         timeout = PERIODIC_RA_TIMEOUT
538         self.send_periodic_ra()
539
540         while True:
541             rlist, _, xlist = select(self.nfq.keys() + [iwfd], [], [], timeout)
542             if xlist:
543                 logging.warn("Warning: Exception on %s" %
544                              ", ".join([ str(fd) for fd in xlist]))
545
546             if rlist:
547                 if iwfd in rlist:
548                 # First check if there are any inotify (= configuration change)
549                 # events
550                     self.notifier.read_events()
551                     self.notifier.process_events()
552                     rlist.remove(iwfd)
553
554                 for fd in rlist:
555                     try:
556                         self.nfq[fd].process_pending()
557                     except e, msg:
558                         logging.warn("Error processing fd %d: %s" % (fd, e))
559
560             # Calculate the new timeout
561             timeout = PERIODIC_RA_TIMEOUT - (time.time() - start)
562
563             if timeout <= 0:
564                 start = time.time()
565                 self.send_periodic_ra()
566                 timeout = PERIODIC_RA_TIMEOUT - (time.time() - start)
567
568
569
570 if __name__ == "__main__":
571     import optparse
572     from capng import *
573     from pwd import getpwnam, getpwuid
574
575     parser = optparse.OptionParser()
576     parser.add_option("-p", "--path", dest="data_path",
577                       help="The location of the data files", metavar="DIR",
578                       default=DEFAULT_PATH)
579     parser.add_option("-c", "--dhcp-queue", dest="dhcp_queue",
580                       help="The nfqueue to receive DHCP requests from"
581                            " (default: %d" % DEFAULT_NFQUEUE_NUM, type="int",
582                       metavar="NUM", default=DEFAULT_NFQUEUE_NUM)
583     parser.add_option("-r", "--rs-queue", dest="rs_queue",
584                       help="The nfqueue to receive IPv6 router"
585                            " solicitations from (default: %d)" %
586                            DEFAULT_NFQUEUE_NUM, type="int",
587                       metavar="NUM", default=DEFAULT_NFQUEUE_NUM)
588     parser.add_option("-n", "--ns-queue", dest="ns_queue",
589                       help="The nfqueue to receive IPv6 neighbor"
590                            " solicitations from (default: %d)" %
591                            DEFAULT_NFQUEUE_NUM, type="int",
592                       metavar="NUM", default=44)
593     parser.add_option("-u", "--user", dest="user",
594                       help="An unprivileged user to run as",
595                       metavar="UID", default=DEFAULT_USER)
596     parser.add_option("-d", "--debug", action="store_true", dest="debug",
597                       help="Turn on debugging messages")
598     parser.add_option("-f", "--foreground", action="store_false", dest="daemonize",
599                       default=True, help="Do not daemonize, stay in the foreground")
600
601
602     opts, args = parser.parse_args()
603
604     if opts.daemonize:
605         d = daemon.DaemonContext()
606         d.open()
607
608     pidfile = open("/var/run/nfdhcpd.pid", "w")
609     pidfile.write("%s" % os.getpid())
610     pidfile.close()
611
612     logger = logging.getLogger()
613     if opts.debug:
614         logger.setLevel(logging.DEBUG)
615     else:
616         logger.setLevel(logging.INFO)
617
618     if opts.daemonize:
619         handler = logging.handlers.RotatingFileHandler(LOG_FILENAME,
620                                                        maxBytes=2097152)
621     else:
622         handler = logging.StreamHandler()
623
624     handler.setFormatter(logging.Formatter(LOG_FORMAT))
625     logger.addHandler(handler)
626
627     logging.info("Starting up")
628     proxy = VMNetProxy(opts.data_path, opts.dhcp_queue,
629                        opts.rs_queue, opts.ns_queue)
630
631     # Drop all capabilities except CAP_NET_RAW and change uid
632     try:
633         uid = getpwuid(int(opts.user))
634     except ValueError:
635         uid = getpwnam(opts.user)
636
637     logging.info("Setting capabilities and changing uid")
638     logging.debug("User: %s, uid: %d, gid: %d" %
639                   (opts.user, uid.pw_uid, uid.pw_gid))
640     capng_clear(CAPNG_SELECT_BOTH)
641     capng_update(CAPNG_ADD, CAPNG_EFFECTIVE|CAPNG_PERMITTED, CAP_NET_RAW)
642     capng_change_id(uid.pw_uid, uid.pw_gid,
643                     CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING)
644     logging.info("Ready to serve requests")
645     proxy.serve()
646
647
648 # vim: set ts=4 sts=4 sw=4 et :