import time
import fcntl
import errno
+import socket
from optparse import OptionParser
from ganeti import utils
from ganeti import constants
+from ganeti import ssconf
class Error(Exception):
"""Generic custom error class."""
- pass
+
+
+class NotMasterError(Error):
+ """Exception raised when this host is not the master."""
def Indent(s, prefix='| '):
Args:
s: The string to indent
prefix: The string to prepend each line.
+
"""
return "%s%s\n" % (prefix, ('\n' + prefix).join(s.splitlines()))
cmd: the command to run.
Raises CommandError with verbose commentary on error.
+
"""
res = utils.RunCmd(cmd)
Remove(name): remove record given by name, if exists.
Save(name): saves all records to file, releases lock and closes file.
+
"""
def __init__(self):
# The two-step dance below is necessary to allow both opening existing
fcntl.flock(f.fileno(), fcntl.LOCK_EX|fcntl.LOCK_NB)
except IOError, x:
if x.errno == errno.EAGAIN:
- raise StandardError('State file already locked')
+ raise StandardError("State file already locked")
raise
self.statefile = f
Args:
instance - the instance to look up.
+
"""
assert self.statefile
Args:
instance - the instance being restarted
+
"""
assert self.statefile
self.inst_map[instance.name] = (when, 1 + self.NumberOfAttempts(instance))
def Remove(self, instance):
- """Update state to reflect that a machine is running, i.e. remove record
+ """Update state to reflect that a machine is running, i.e. remove record.
Args:
instance - the instance to remove from books
- This method removes the record for a named instance
+ This method removes the record for a named instance.
+
"""
assert self.statefile
def Save(self):
"""Save records to file, then unlock and close file.
+
"""
assert self.statefile
Methods:
Restart(): issue a command to restart the represented machine.
+
"""
def __init__(self, name, state):
self.name = name
self.state = state
def Restart(self):
+ """Encapsulates the start of an instance.
+
+ This is currently done using the command line interface and not
+ the Ganeti modules.
+
+ """
DoCmd(['gnt-instance', 'startup', '--lock-retries=15', self.name])
class InstanceList(object):
"""The set of Virtual Machine instances on a cluster.
+
"""
cmd = ['gnt-instance', 'list', '--lock-retries=15',
'-o', 'name,admin_state,oper_state', '--no-headers', '--separator=:']
class Message(object):
"""Encapsulation of a notice or error message.
+
"""
def __init__(self, level, msg):
self.level = level
The calling program should periodically instantiate me and call Run().
This will traverse the list of instances, and make up to MAXTRIES attempts
to restart machines that are down.
+
"""
def __init__(self):
+ sstore = ssconf.SimpleStore()
+ master = sstore.GetMasterNode()
+ if master != socket.gethostname():
+ raise NotMasterError("This is not the master node")
self.instances = InstanceList()
self.messages = []
def Run(self):
"""Make a pass over the list of instances, restarting downed ones.
+
"""
notepad = RestarterState()
notepad.Save()
def WriteReport(self, logfile):
- """
- Log all messages to file.
+ """Log all messages to file.
Args:
logfile: file object open for writing (the log file)
+
"""
for msg in self.messages:
print >> logfile, str(msg)
restarter = Restarter()
restarter.Run()
restarter.WriteReport(sys.stdout)
+ except NotMasterError:
+ if options.debug:
+ sys.stderr.write("Not master, exiting.\n")
+ sys.exit(constants.EXIT_NOTMASTER)
except Error, err:
print err