Statistics
| Branch: | Tag: | Revision:

root / lib / watcher / nodemaint.py @ fb62843c

History | View | Annotate | Download (4.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module doing node maintenance for Ganeti watcher.
23

24
"""
25

    
26
import logging
27

    
28
from ganeti import constants
29
from ganeti import errors
30
from ganeti import hypervisor
31
from ganeti import netutils
32
from ganeti import ssconf
33
from ganeti import utils
34
from ganeti import confd
35
from ganeti.storage import drbd
36

    
37
import ganeti.confd.client # pylint: disable=W0611
38

    
39

    
40
class NodeMaintenance(object):
41
  """Talks to confd daemons and possible shutdown instances/drbd devices.
42

43
  """
44
  def __init__(self):
45
    self.store_cb = confd.client.StoreResultCallback()
46
    self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb)
47
    self.confd_client = confd.client.GetConfdClient(self.filter_cb)
48

    
49
  @staticmethod
50
  def ShouldRun():
51
    """Checks whether node maintenance should run.
52

53
    """
54
    try:
55
      return ssconf.SimpleStore().GetMaintainNodeHealth()
56
    except errors.ConfigurationError, err:
57
      logging.error("Configuration error, not activating node maintenance: %s",
58
                    err)
59
      return False
60

    
61
  @staticmethod
62
  def GetRunningInstances():
63
    """Compute list of hypervisor/running instances.
64

65
    """
66
    hyp_list = ssconf.SimpleStore().GetHypervisorList()
67
    hvparams = ssconf.SimpleStore().GetHvparams()
68
    results = []
69
    for hv_name in hyp_list:
70
      try:
71
        hv = hypervisor.GetHypervisor(hv_name)
72
        ilist = hv.ListInstances(hvparams=hvparams)
73
        results.extend([(iname, hv_name) for iname in ilist])
74
      except: # pylint: disable=W0702
75
        logging.error("Error while listing instances for hypervisor %s",
76
                      hv_name, exc_info=True)
77
    return results
78

    
79
  @staticmethod
80
  def GetUsedDRBDs():
81
    """Get list of used DRBD minors.
82

83
    """
84
    return drbd.DRBD8.GetUsedDevs()
85

    
86
  @classmethod
87
  def DoMaintenance(cls, role):
88
    """Maintain the instance list.
89

90
    """
91
    if role == constants.CONFD_NODE_ROLE_OFFLINE:
92
      inst_running = cls.GetRunningInstances()
93
      cls.ShutdownInstances(inst_running)
94
      drbd_running = cls.GetUsedDRBDs()
95
      cls.ShutdownDRBD(drbd_running)
96
    else:
97
      logging.debug("Not doing anything for role %s", role)
98

    
99
  @staticmethod
100
  def ShutdownInstances(inst_running):
101
    """Shutdown running instances.
102

103
    """
104
    names_running = set([i[0] for i in inst_running])
105
    if names_running:
106
      logging.info("Following instances should not be running,"
107
                   " shutting them down: %s", utils.CommaJoin(names_running))
108
      # this dictionary will collapse duplicate instance names (only
109
      # xen pvm/vhm) into a single key, which is fine
110
      i2h = dict(inst_running)
111
      for name in names_running:
112
        hv_name = i2h[name]
113
        hv = hypervisor.GetHypervisor(hv_name)
114
        hv.StopInstance(None, force=True, name=name)
115

    
116
  @staticmethod
117
  def ShutdownDRBD(drbd_running):
118
    """Shutdown active DRBD devices.
119

120
    """
121
    if drbd_running:
122
      logging.info("Following DRBD minors should not be active,"
123
                   " shutting them down: %s", utils.CommaJoin(drbd_running))
124
      for minor in drbd_running:
125
        drbd.DRBD8.ShutdownAll(minor)
126

    
127
  def Exec(self):
128
    """Check node status versus cluster desired state.
129

130
    """
131
    if not constants.ENABLE_CONFD:
132
      logging.warning("Confd use not enabled, cannot do maintenance")
133
      return
134

    
135
    my_name = netutils.Hostname.GetSysName()
136
    req = \
137
      confd.client.ConfdClientRequest(type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
138
                                      query=my_name)
139
    self.confd_client.SendRequest(req, async=False, coverage=-1)
140
    timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt)
141
    if not timed_out:
142
      # should have a valid response
143
      status, result = self.store_cb.GetResponse(req.rsalt)
144
      assert status, "Missing result but received replies"
145
      if not self.filter_cb.consistent[req.rsalt]:
146
        logging.warning("Inconsistent replies, not doing anything")
147
        return
148
      self.DoMaintenance(result.server_reply.answer)
149
    else:
150
      logging.warning("Confd query timed out, cannot do maintenance actions")