Statistics
| Branch: | Tag: | Revision:

root / lib / watcher / nodemaint.py @ 9ca87fb3

History | View | Annotate | Download (4.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module doing node maintenance for Ganeti watcher.
23

24
"""
25

    
26
import logging
27

    
28
from ganeti import bdev
29
from ganeti import constants
30
from ganeti import errors
31
from ganeti import hypervisor
32
from ganeti import netutils
33
from ganeti import ssconf
34
from ganeti import utils
35
from ganeti import confd
36

    
37
import ganeti.confd.client # pylint: disable-msg=W0611
38

    
39

    
40
class NodeMaintenance(object):
41
  """Talks to confd daemons and possible shutdown instances/drbd devices.
42

43
  """
44
  def __init__(self):
45
    self.store_cb = confd.client.StoreResultCallback()
46
    self.filter_cb = confd.client.ConfdFilterCallback(self.store_cb)
47
    self.confd_client = confd.client.GetConfdClient(self.filter_cb)
48

    
49
  @staticmethod
50
  def ShouldRun():
51
    """Checks whether node maintenance should run.
52

53
    """
54
    try:
55
      return ssconf.SimpleStore().GetMaintainNodeHealth()
56
    except errors.ConfigurationError, err:
57
      logging.error("Configuration error, not activating node maintenance: %s",
58
                    err)
59
      return False
60

    
61
  @staticmethod
62
  def GetRunningInstances():
63
    """Compute list of hypervisor/running instances.
64

65
    """
66
    hyp_list = ssconf.SimpleStore().GetHypervisorList()
67
    results = []
68
    for hv_name in hyp_list:
69
      try:
70
        hv = hypervisor.GetHypervisor(hv_name)
71
        ilist = hv.ListInstances()
72
        results.extend([(iname, hv_name) for iname in ilist])
73
      except: # pylint: disable-msg=W0702
74
        logging.error("Error while listing instances for hypervisor %s",
75
                      hv_name, exc_info=True)
76
    return results
77

    
78
  @staticmethod
79
  def GetUsedDRBDs():
80
    """Get list of used DRBD minors.
81

82
    """
83
    return bdev.DRBD8.GetUsedDevs().keys()
84

    
85
  @classmethod
86
  def DoMaintenance(cls, role):
87
    """Maintain the instance list.
88

89
    """
90
    if role == constants.CONFD_NODE_ROLE_OFFLINE:
91
      inst_running = cls.GetRunningInstances()
92
      cls.ShutdownInstances(inst_running)
93
      drbd_running = cls.GetUsedDRBDs()
94
      cls.ShutdownDRBD(drbd_running)
95
    else:
96
      logging.debug("Not doing anything for role %s", role)
97

    
98
  @staticmethod
99
  def ShutdownInstances(inst_running):
100
    """Shutdown running instances.
101

102
    """
103
    names_running = set([i[0] for i in inst_running])
104
    if names_running:
105
      logging.info("Following instances should not be running,"
106
                   " shutting them down: %s", utils.CommaJoin(names_running))
107
      # this dictionary will collapse duplicate instance names (only
108
      # xen pvm/vhm) into a single key, which is fine
109
      i2h = dict(inst_running)
110
      for name in names_running:
111
        hv_name = i2h[name]
112
        hv = hypervisor.GetHypervisor(hv_name)
113
        hv.StopInstance(None, force=True, name=name)
114

    
115
  @staticmethod
116
  def ShutdownDRBD(drbd_running):
117
    """Shutdown active DRBD devices.
118

119
    """
120
    if drbd_running:
121
      logging.info("Following DRBD minors should not be active,"
122
                   " shutting them down: %s", utils.CommaJoin(drbd_running))
123
      for minor in drbd_running:
124
        # pylint: disable-msg=W0212
125
        # using the private method as is, pending enhancements to the DRBD
126
        # interface
127
        bdev.DRBD8._ShutdownAll(minor)
128

    
129
  def Exec(self):
130
    """Check node status versus cluster desired state.
131

132
    """
133
    my_name = netutils.Hostname.GetSysName()
134
    req = confd.client.ConfdClientRequest(type=
135
                                          constants.CONFD_REQ_NODE_ROLE_BYNAME,
136
                                          query=my_name)
137
    self.confd_client.SendRequest(req, async=False, coverage=-1)
138
    timed_out, _, _ = self.confd_client.WaitForReply(req.rsalt)
139
    if not timed_out:
140
      # should have a valid response
141
      status, result = self.store_cb.GetResponse(req.rsalt)
142
      assert status, "Missing result but received replies"
143
      if not self.filter_cb.consistent[req.rsalt]:
144
        logging.warning("Inconsistent replies, not doing anything")
145
        return
146
      self.DoMaintenance(result.server_reply.answer)
147
    else:
148
      logging.warning("Confd query timed out, cannot do maintenance actions")