HELPLESS_STATES = ['(node down)']
NOTICE = 'NOTICE'
ERROR = 'ERROR'
+KEY_RESTART_COUNT = "restart_count"
+KEY_RESTART_WHEN = "restart_when"
+KEY_BOOT_ID = "bootid"
class Error(Exception):
except Exception, msg:
# Ignore errors while loading the file and treat it as empty
self.data = {}
- sys.stderr.write("Empty or invalid state file. "
- "Using defaults. Error message: %s\n" % msg)
+ sys.stderr.write("Empty or invalid state file."
+ " Using defaults. Error message: %s\n" % msg)
if "instance" not in self.data:
self.data["instance"] = {}
"""
ndata = self.data["node"]
- if name in ndata and "bootid" in ndata[name]:
- return ndata[name]["bootid"]
+ if name in ndata and KEY_BOOT_ID in ndata[name]:
+ return ndata[name][KEY_BOOT_ID]
return None
def SetNodeBootID(self, name, bootid):
if name not in ndata:
ndata[name] = {}
- ndata[name]["bootid"] = bootid
+ ndata[name][KEY_BOOT_ID] = bootid
def NumberOfRestartAttempts(self, instance):
"""Returns number of previous restart attempts.
idata = self.data["instance"]
if instance.name in idata:
- return idata[instance.name]["restart_count"]
+ return idata[instance.name][KEY_RESTART_COUNT]
return 0
else:
inst = idata[instance.name]
- inst["restart_when"] = time.time()
- inst["restart_count"] = idata.get("restart_count", 0) + 1
+ inst[KEY_RESTART_WHEN] = time.time()
+ inst[KEY_RESTART_COUNT] = inst.get(KEY_RESTART_COUNT, 0) + 1
def RemoveInstance(self, instance):
"""Update state to reflect that a machine is running, i.e. remove record.
notepad = WatcherState()
self.CheckInstances(notepad)
self.CheckDisks(notepad)
+ self.VerifyDisks()
notepad.Save()
def CheckDisks(self, notepad):
# secondary node.
for instance in GetInstanceList(with_secondaries=check_nodes):
try:
- self.messages.append(Message(NOTICE,
- "Activating disks for %s." %
- instance.name))
+ self.messages.append(Message(NOTICE, ("Activating disks for %s." %
+ instance.name)))
instance.ActivateDisks()
except Error, x:
self.messages.append(Message(ERROR, str(x)))
(instance.name, MAXTRIES)))
continue
try:
- self.messages.append(Message(NOTICE,
- "Restarting %s%s." %
- (instance.name, last)))
+ self.messages.append(Message(NOTICE, ("Restarting %s%s." %
+ (instance.name, last))))
instance.Restart()
except Error, x:
self.messages.append(Message(ERROR, str(x)))
else:
if notepad.NumberOfRestartAttempts(instance):
notepad.RemoveInstance(instance)
- msg = Message(NOTICE,
- "Restart of %s succeeded." % instance.name)
+ msg = Message(NOTICE, "Restart of %s succeeded." % instance.name)
self.messages.append(msg)
+ def VerifyDisks(self):
+ """Run gnt-cluster verify-disks.
+
+ """
+ result = DoCmd(['gnt-cluster', 'verify-disks', '--lock-retries=15'])
+ if result.output:
+ self.messages.append(Message(NOTICE, result.output))
+
def WriteReport(self, logfile):
"""Log all messages to file.
sys.stderr = sys.stdout = open(constants.LOG_WATCHER, 'a')
try:
- watcher = Watcher()
+ try:
+ watcher = Watcher()
+ except errors.ConfigurationError:
+ # Just exit if there's no configuration
+ sys.exit(constants.EXIT_SUCCESS)
watcher.Run()
watcher.WriteReport(sys.stdout)
except NotMasterError: