Statistics
| Branch: | Tag: | Revision:

root / lib / watcher / nodemaint.py @ ce9283c1

History | View | Annotate | Download (4.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module doing node maintenance for Ganeti watcher.
23

24
"""
25

    
26
import logging
27

    
28
from ganeti import constants
29
from ganeti import errors
30
from ganeti import hypervisor
31
from ganeti import netutils
32
from ganeti import ssconf
33
from ganeti import utils
34
from ganeti import confd
35
from ganeti.block import bdev
36

    
37
import ganeti.confd.client # pylint: disable=W0611
38

    
39

    
40
class NodeMaintenance(object):
41
  """Talks to confd daemons and possible shutdown instances/drbd devices.
42

43
  """
44
  def __init__(self):
45
    self.store_cb = confd.client.StoreResultCallback()
46
    self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb)
47
    self.confd_client = confd.client.GetConfdClient(self.filter_cb)
48

    
49
  @staticmethod
50
  def ShouldRun():
51
    """Checks whether node maintenance should run.
52

53
    """
54
    try:
55
      return ssconf.SimpleStore().GetMaintainNodeHealth()
56
    except errors.ConfigurationError, err:
57
      logging.error("Configuration error, not activating node maintenance: %s",
58
                    err)
59
      return False
60

    
61
  @staticmethod
62
  def GetRunningInstances():
63
    """Compute list of hypervisor/running instances.
64

65
    """
66
    hyp_list = ssconf.SimpleStore().GetHypervisorList()
67
    results = []
68
    for hv_name in hyp_list:
69
      try:
70
        hv = hypervisor.GetHypervisor(hv_name)
71
        ilist = hv.ListInstances()
72
        results.extend([(iname, hv_name) for iname in ilist])
73
      except: # pylint: disable=W0702
74
        logging.error("Error while listing instances for hypervisor %s",
75
                      hv_name, exc_info=True)
76
    return results
77

    
78
  @staticmethod
79
  def GetUsedDRBDs():
80
    """Get list of used DRBD minors.
81

82
    """
83
    return bdev.DRBD8.GetUsedDevs().keys()
84

    
85
  @classmethod
86
  def DoMaintenance(cls, role):
87
    """Maintain the instance list.
88

89
    """
90
    if role == constants.CONFD_NODE_ROLE_OFFLINE:
91
      inst_running = cls.GetRunningInstances()
92
      cls.ShutdownInstances(inst_running)
93
      drbd_running = cls.GetUsedDRBDs()
94
      cls.ShutdownDRBD(drbd_running)
95
    else:
96
      logging.debug("Not doing anything for role %s", role)
97

    
98
  @staticmethod
99
  def ShutdownInstances(inst_running):
100
    """Shutdown running instances.
101

102
    """
103
    names_running = set([i[0] for i in inst_running])
104
    if names_running:
105
      logging.info("Following instances should not be running,"
106
                   " shutting them down: %s", utils.CommaJoin(names_running))
107
      # this dictionary will collapse duplicate instance names (only
108
      # xen pvm/vhm) into a single key, which is fine
109
      i2h = dict(inst_running)
110
      for name in names_running:
111
        hv_name = i2h[name]
112
        hv = hypervisor.GetHypervisor(hv_name)
113
        hv.StopInstance(None, force=True, name=name)
114

    
115
  @staticmethod
116
  def ShutdownDRBD(drbd_running):
117
    """Shutdown active DRBD devices.
118

119
    """
120
    if drbd_running:
121
      logging.info("Following DRBD minors should not be active,"
122
                   " shutting them down: %s", utils.CommaJoin(drbd_running))
123
      for minor in drbd_running:
124
        # pylint: disable=W0212
125
        # using the private method as is, pending enhancements to the DRBD
126
        # interface
127
        bdev.DRBD8._ShutdownAll(minor)
128

    
129
  def Exec(self):
130
    """Check node status versus cluster desired state.
131

132
    """
133
    if not constants.ENABLE_CONFD:
134
      logging.warning("Confd use not enabled, cannot do maintenance")
135
      return
136

    
137
    my_name = netutils.Hostname.GetSysName()
138
    req = \
139
      confd.client.ConfdClientRequest(type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
140
                                      query=my_name)
141
    self.confd_client.SendRequest(req, async=False, coverage=-1)
142
    timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt)
143
    if not timed_out:
144
      # should have a valid response
145
      status, result = self.store_cb.GetResponse(req.rsalt)
146
      assert status, "Missing result but received replies"
147
      if not self.filter_cb.consistent[req.rsalt]:
148
        logging.warning("Inconsistent replies, not doing anything")
149
        return
150
      self.DoMaintenance(result.server_reply.answer)
151
    else:
152
      logging.warning("Confd query timed out, cannot do maintenance actions")