projects
/
ganeti-local
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Import first version of Ganeti Remote API
[ganeti-local]
/
daemons
/
ganeti-watcher
diff --git
a/daemons/ganeti-watcher
b/daemons/ganeti-watcher
index
c912592
..
8b2b183
100755
(executable)
--- a/
daemons/ganeti-watcher
+++ b/
daemons/ganeti-watcher
@@
-47,6
+47,9
@@
BAD_STATES = ['stopped']
HELPLESS_STATES = ['(node down)']
NOTICE = 'NOTICE'
ERROR = 'ERROR'
HELPLESS_STATES = ['(node down)']
NOTICE = 'NOTICE'
ERROR = 'ERROR'
+KEY_RESTART_COUNT = "restart_count"
+KEY_RESTART_WHEN = "restart_when"
+KEY_BOOT_ID = "bootid"
class Error(Exception):
class Error(Exception):
@@
-119,8
+122,8
@@
class WatcherState(object):
except Exception, msg:
# Ignore errors while loading the file and treat it as empty
self.data = {}
except Exception, msg:
# Ignore errors while loading the file and treat it as empty
self.data = {}
- sys.stderr.write("Empty or invalid state file. "
- "Using defaults. Error message: %s\n" % msg)
+ sys.stderr.write("Empty or invalid state file."
+ " Using defaults. Error message: %s\n" % msg)
if "instance" not in self.data:
self.data["instance"] = {}
if "instance" not in self.data:
self.data["instance"] = {}
@@
-151,8
+154,8
@@
class WatcherState(object):
"""
ndata = self.data["node"]
"""
ndata = self.data["node"]
- if name in ndata and "bootid" in ndata[name]:
- return ndata[name]["bootid"]
+ if name in ndata and KEY_BOOT_ID in ndata[name]:
+ return ndata[name][KEY_BOOT_ID]
return None
def SetNodeBootID(self, name, bootid):
return None
def SetNodeBootID(self, name, bootid):
@@
-166,7
+169,7
@@
class WatcherState(object):
if name not in ndata:
ndata[name] = {}
if name not in ndata:
ndata[name] = {}
- ndata[name]["bootid"] = bootid
+ ndata[name][KEY_BOOT_ID] = bootid
def NumberOfRestartAttempts(self, instance):
"""Returns number of previous restart attempts.
def NumberOfRestartAttempts(self, instance):
"""Returns number of previous restart attempts.
@@
-178,7
+181,7
@@
class WatcherState(object):
idata = self.data["instance"]
if instance.name in idata:
idata = self.data["instance"]
if instance.name in idata:
- return idata[instance.name]["restart_count"]
+ return idata[instance.name][KEY_RESTART_COUNT]
return 0
return 0
@@
-196,8
+199,8
@@
class WatcherState(object):
else:
inst = idata[instance.name]
else:
inst = idata[instance.name]
- inst["restart_when"] = time.time()
- inst["restart_count"] = idata.get("restart_count", 0) + 1
+ inst[KEY_RESTART_WHEN] = time.time()
+ inst[KEY_RESTART_COUNT] = inst.get(KEY_RESTART_COUNT, 0) + 1
def RemoveInstance(self, instance):
"""Update state to reflect that a machine is running, i.e. remove record.
def RemoveInstance(self, instance):
"""Update state to reflect that a machine is running, i.e. remove record.
@@
-346,6
+349,7
@@
class Watcher(object):
notepad = WatcherState()
self.CheckInstances(notepad)
self.CheckDisks(notepad)
notepad = WatcherState()
self.CheckInstances(notepad)
self.CheckDisks(notepad)
+ self.VerifyDisks()
notepad.Save()
def CheckDisks(self, notepad):
notepad.Save()
def CheckDisks(self, notepad):
@@
-364,9
+368,8
@@
class Watcher(object):
# secondary node.
for instance in GetInstanceList(with_secondaries=check_nodes):
try:
# secondary node.
for instance in GetInstanceList(with_secondaries=check_nodes):
try:
- self.messages.append(Message(NOTICE,
- "Activating disks for %s." %
- instance.name))
+ self.messages.append(Message(NOTICE, ("Activating disks for %s." %
+ instance.name)))
instance.ActivateDisks()
except Error, x:
self.messages.append(Message(ERROR, str(x)))
instance.ActivateDisks()
except Error, x:
self.messages.append(Message(ERROR, str(x)))
@@
-399,9
+402,8
@@
class Watcher(object):
(instance.name, MAXTRIES)))
continue
try:
(instance.name, MAXTRIES)))
continue
try:
- self.messages.append(Message(NOTICE,
- "Restarting %s%s." %
- (instance.name, last)))
+ self.messages.append(Message(NOTICE, ("Restarting %s%s." %
+ (instance.name, last))))
instance.Restart()
except Error, x:
self.messages.append(Message(ERROR, str(x)))
instance.Restart()
except Error, x:
self.messages.append(Message(ERROR, str(x)))
@@
-413,10
+415,17
@@
class Watcher(object):
else:
if notepad.NumberOfRestartAttempts(instance):
notepad.RemoveInstance(instance)
else:
if notepad.NumberOfRestartAttempts(instance):
notepad.RemoveInstance(instance)
- msg = Message(NOTICE,
- "Restart of %s succeeded." % instance.name)
+ msg = Message(NOTICE, "Restart of %s succeeded." % instance.name)
self.messages.append(msg)
self.messages.append(msg)
+ def VerifyDisks(self):
+ """Run gnt-cluster verify-disks.
+
+ """
+ result = DoCmd(['gnt-cluster', 'verify-disks', '--lock-retries=15'])
+ if result.output:
+ self.messages.append(Message(NOTICE, result.output))
+
def WriteReport(self, logfile):
"""Log all messages to file.
def WriteReport(self, logfile):
"""Log all messages to file.
@@
-457,7
+466,11
@@
def main():
sys.stderr = sys.stdout = open(constants.LOG_WATCHER, 'a')
try:
sys.stderr = sys.stdout = open(constants.LOG_WATCHER, 'a')
try:
- watcher = Watcher()
+ try:
+ watcher = Watcher()
+ except errors.ConfigurationError:
+ # Just exit if there's no configuration
+ sys.exit(constants.EXIT_SUCCESS)
watcher.Run()
watcher.WriteReport(sys.stdout)
except NotMasterError:
watcher.Run()
watcher.WriteReport(sys.stdout)
except NotMasterError: