Revision 38242904 daemons/ganeti-watcher

b/daemons/ganeti-watcher
39 39
import time
40 40
import fcntl
41 41
import errno
42
import socket
42 43
from optparse import OptionParser
43 44

  
44 45

  
45 46
from ganeti import utils
46 47
from ganeti import constants
48
from ganeti import ssconf
47 49

  
48 50

  
49 51
class Error(Exception):
50 52
  """Generic custom error class."""
51
  pass
53

  
54

  
55
class NotMasterError(Error):
56
  """Exception raised when this host is not the master."""
52 57

  
53 58

  
54 59
def Indent(s, prefix='| '):
......
57 62
  Args:
58 63
    s: The string to indent
59 64
    prefix: The string to prepend each line.
65

  
60 66
  """
61 67
  return "%s%s\n" % (prefix, ('\n' + prefix).join(s.splitlines()))
62 68

  
......
68 74
    cmd: the command to run.
69 75

  
70 76
  Raises CommandError with verbose commentary on error.
77

  
71 78
  """
72 79
  res = utils.RunCmd(cmd)
73 80

  
......
97 104
    Remove(name): remove record given by name, if exists.
98 105

  
99 106
    Save(name): saves all records to file, releases lock and closes file.
107

  
100 108
  """
101 109
  def __init__(self):
102 110
    # The two-step dance below is necessary to allow both opening existing
......
128 136

  
129 137
    Args:
130 138
      instance - the instance to look up.
139

  
131 140
    """
132 141
    assert self.statefile
133 142

  
......
141 150

  
142 151
    Args:
143 152
      instance - the instance being restarted
153

  
144 154
    """
145 155
    assert self.statefile
146 156

  
......
149 159
    self.inst_map[instance.name] = (when, 1 + self.NumberOfAttempts(instance))
150 160

  
151 161
  def Remove(self, instance):
152
    """Update state to reflect that a machine is running, i.e. remove record
162
    """Update state to reflect that a machine is running, i.e. remove record.
153 163

  
154 164
    Args:
155 165
      instance - the instance to remove from books
156 166

  
157
    This method removes the record for a named instance
167
    This method removes the record for a named instance.
168

  
158 169
    """
159 170
    assert self.statefile
160 171

  
......
163 174

  
164 175
  def Save(self):
165 176
    """Save records to file, then unlock and close file.
177

  
166 178
    """
167 179
    assert self.statefile
168 180

  
......
194 206

  
195 207
class InstanceList(object):
196 208
  """The set of Virtual Machine instances on a cluster.
209

  
197 210
  """
198 211
  cmd = ['gnt-instance', 'list', '--lock-retries=15',
199 212
         '-o', 'name,admin_state,oper_state', '--no-headers', '--separator=:']
......
221 234

  
222 235
class Message(object):
223 236
  """Encapsulation of a notice or error message.
237

  
224 238
  """
225 239
  def __init__(self, level, msg):
226 240
    self.level = level
......
237 251
  The calling program should periodically instantiate me and call Run().
238 252
  This will traverse the list of instances, and make up to MAXTRIES attempts
239 253
  to restart machines that are down.
254

  
240 255
  """
241 256
  def __init__(self):
257
    sstore = ssconf.SimpleStore()
258
    master = sstore.GetMasterNode()
259
    if master != socket.gethostname():
260
      raise NotMasterError, ("This is not the master node")
242 261
    self.instances = InstanceList()
243 262
    self.messages = []
244 263

  
245 264
  def Run(self):
246 265
    """Make a pass over the list of instances, restarting downed ones.
266

  
247 267
    """
248 268
    notepad = RestarterState()
249 269

  
......
284 304
    notepad.Save()
285 305

  
286 306
  def WriteReport(self, logfile):
287
    """
288
    Log all messages to file.
307
    """Log all messages to file.
289 308

  
290 309
    Args:
291 310
      logfile: file object open for writing (the log file)
311

  
292 312
    """
293 313
    for msg in self.messages:
294 314
      print >> logfile, str(msg)
......
326 346
    restarter = Restarter()
327 347
    restarter.Run()
328 348
    restarter.WriteReport(sys.stdout)
349
  except NotMasterError:
350
    if options.debug:
351
      sys.stderr.write("Not master, exiting.\n")
352
    sys.exit(constants.EXIT_NOTMASTER)
329 353
  except Error, err:
330 354
    print err
331 355

  

Also available in: Unified diff