Statistics
| Branch: | Tag: | Revision:

root / lib / watcher / nodemaint.py @ 47e0abee

History | View | Annotate | Download (4.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module doing node maintenance for Ganeti watcher.
23

24
"""
25

    
26
import logging
27

    
28
from ganeti import constants
29
from ganeti import errors
30
from ganeti import hypervisor
31
from ganeti import netutils
32
from ganeti import ssconf
33
from ganeti import utils
34
from ganeti import confd
35
from ganeti.block import drbd
36

    
37
import ganeti.confd.client # pylint: disable=W0611
38

    
39

    
40
class NodeMaintenance(object):
41
  """Talks to confd daemons and possible shutdown instances/drbd devices.
42

43
  """
44
  def __init__(self):
45
    self.store_cb = confd.client.StoreResultCallback()
46
    self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb)
47
    self.confd_client = confd.client.GetConfdClient(self.filter_cb)
48

    
49
  @staticmethod
50
  def ShouldRun():
51
    """Checks whether node maintenance should run.
52

53
    """
54
    try:
55
      return ssconf.SimpleStore().GetMaintainNodeHealth()
56
    except errors.ConfigurationError, err:
57
      logging.error("Configuration error, not activating node maintenance: %s",
58
                    err)
59
      return False
60

    
61
  @staticmethod
62
  def GetRunningInstances():
63
    """Compute list of hypervisor/running instances.
64

65
    """
66
    hyp_list = ssconf.SimpleStore().GetHypervisorList()
67
    results = []
68
    for hv_name in hyp_list:
69
      try:
70
        hv = hypervisor.GetHypervisor(hv_name)
71
        ilist = hv.ListInstances()
72
        results.extend([(iname, hv_name) for iname in ilist])
73
      except: # pylint: disable=W0702
74
        logging.error("Error while listing instances for hypervisor %s",
75
                      hv_name, exc_info=True)
76
    return results
77

    
78
  @staticmethod
79
  def GetUsedDRBDs():
80
    """Get list of used DRBD minors.
81

82
    """
83
    return drbd.DRBD8.GetUsedDevs()
84

    
85
  @classmethod
86
  def DoMaintenance(cls, role):
87
    """Maintain the instance list.
88

89
    """
90
    if role == constants.CONFD_NODE_ROLE_OFFLINE:
91
      inst_running = cls.GetRunningInstances()
92
      cls.ShutdownInstances(inst_running)
93
      drbd_running = cls.GetUsedDRBDs()
94
      cls.ShutdownDRBD(drbd_running)
95
    else:
96
      logging.debug("Not doing anything for role %s", role)
97

    
98
  @staticmethod
99
  def ShutdownInstances(inst_running):
100
    """Shutdown running instances.
101

102
    """
103
    names_running = set([i[0] for i in inst_running])
104
    if names_running:
105
      logging.info("Following instances should not be running,"
106
                   " shutting them down: %s", utils.CommaJoin(names_running))
107
      # this dictionary will collapse duplicate instance names (only
108
      # xen pvm/vhm) into a single key, which is fine
109
      i2h = dict(inst_running)
110
      for name in names_running:
111
        hv_name = i2h[name]
112
        hv = hypervisor.GetHypervisor(hv_name)
113
        hv.StopInstance(None, force=True, name=name)
114

    
115
  @staticmethod
116
  def ShutdownDRBD(drbd_running):
117
    """Shutdown active DRBD devices.
118

119
    """
120
    if drbd_running:
121
      logging.info("Following DRBD minors should not be active,"
122
                   " shutting them down: %s", utils.CommaJoin(drbd_running))
123
      for minor in drbd_running:
124
        drbd.DRBD8.ShutdownAll(minor)
125

    
126
  def Exec(self):
127
    """Check node status versus cluster desired state.
128

129
    """
130
    if not constants.ENABLE_CONFD:
131
      logging.warning("Confd use not enabled, cannot do maintenance")
132
      return
133

    
134
    my_name = netutils.Hostname.GetSysName()
135
    req = \
136
      confd.client.ConfdClientRequest(type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
137
                                      query=my_name)
138
    self.confd_client.SendRequest(req, async=False, coverage=-1)
139
    timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt)
140
    if not timed_out:
141
      # should have a valid response
142
      status, result = self.store_cb.GetResponse(req.rsalt)
143
      assert status, "Missing result but received replies"
144
      if not self.filter_cb.consistent[req.rsalt]:
145
        logging.warning("Inconsistent replies, not doing anything")
146
        return
147
      self.DoMaintenance(result.server_reply.answer)
148
    else:
149
      logging.warning("Confd query timed out, cannot do maintenance actions")