4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module doing node maintenance for Ganeti watcher.
28 from ganeti import bdev
29 from ganeti import constants
30 from ganeti import errors
31 from ganeti import hypervisor
32 from ganeti import netutils
33 from ganeti import ssconf
34 from ganeti import utils
35 from ganeti import confd
37 import ganeti.confd.client # pylint: disable=W0611
40 class NodeMaintenance(object):
41 """Talks to confd daemons and possible shutdown instances/drbd devices.
45 self.store_cb = confd.client.StoreResultCallback()
46 self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb)
47 self.confd_client = confd.client.GetConfdClient(self.filter_cb)
51 """Checks whether node maintenance should run.
55 return ssconf.SimpleStore().GetMaintainNodeHealth()
56 except errors.ConfigurationError, err:
57 logging.error("Configuration error, not activating node maintenance: %s",
62 def GetRunningInstances():
63 """Compute list of hypervisor/running instances.
66 hyp_list = ssconf.SimpleStore().GetHypervisorList()
68 for hv_name in hyp_list:
70 hv = hypervisor.GetHypervisor(hv_name)
71 ilist = hv.ListInstances()
72 results.extend([(iname, hv_name) for iname in ilist])
73 except: # pylint: disable=W0702
74 logging.error("Error while listing instances for hypervisor %s",
75 hv_name, exc_info=True)
80 """Get list of used DRBD minors.
83 return bdev.DRBD8.GetUsedDevs().keys()
86 def DoMaintenance(cls, role):
87 """Maintain the instance list.
90 if role == constants.CONFD_NODE_ROLE_OFFLINE:
91 inst_running = cls.GetRunningInstances()
92 cls.ShutdownInstances(inst_running)
93 drbd_running = cls.GetUsedDRBDs()
94 cls.ShutdownDRBD(drbd_running)
96 logging.debug("Not doing anything for role %s", role)
99 def ShutdownInstances(inst_running):
100 """Shutdown running instances.
103 names_running = set([i[0] for i in inst_running])
105 logging.info("Following instances should not be running,"
106 " shutting them down: %s", utils.CommaJoin(names_running))
107 # this dictionary will collapse duplicate instance names (only
108 # xen pvm/vhm) into a single key, which is fine
109 i2h = dict(inst_running)
110 for name in names_running:
112 hv = hypervisor.GetHypervisor(hv_name)
113 hv.StopInstance(None, force=True, name=name)
116 def ShutdownDRBD(drbd_running):
117 """Shutdown active DRBD devices.
121 logging.info("Following DRBD minors should not be active,"
122 " shutting them down: %s", utils.CommaJoin(drbd_running))
123 for minor in drbd_running:
124 # pylint: disable=W0212
125 # using the private method as is, pending enhancements to the DRBD
127 bdev.DRBD8._ShutdownAll(minor)
130 """Check node status versus cluster desired state.
133 if not constants.ENABLE_CONFD:
134 logging.warning("Confd use not enabled, cannot do maintenance")
137 my_name = netutils.Hostname.GetSysName()
139 confd.client.ConfdClientRequest(type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
141 self.confd_client.SendRequest(req, async=False, coverage=-1)
142 timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt)
144 # should have a valid response
145 status, result = self.store_cb.GetResponse(req.rsalt)
146 assert status, "Missing result but received replies"
147 if not self.filter_cb.consistent[req.rsalt]:
148 logging.warning("Inconsistent replies, not doing anything")
150 self.DoMaintenance(result.server_reply.answer)
152 logging.warning("Confd query timed out, cannot do maintenance actions")