Revision 38242904 daemons/ganeti-watcher
b/daemons/ganeti-watcher | ||
---|---|---|
39 | 39 |
import time |
40 | 40 |
import fcntl |
41 | 41 |
import errno |
42 |
import socket |
|
42 | 43 |
from optparse import OptionParser |
43 | 44 |
|
44 | 45 |
|
45 | 46 |
from ganeti import utils |
46 | 47 |
from ganeti import constants |
48 |
from ganeti import ssconf |
|
47 | 49 |
|
48 | 50 |
|
49 | 51 |
class Error(Exception): |
50 | 52 |
"""Generic custom error class.""" |
51 |
pass |
|
53 |
|
|
54 |
|
|
55 |
class NotMasterError(Error): |
|
56 |
"""Exception raised when this host is not the master.""" |
|
52 | 57 |
|
53 | 58 |
|
54 | 59 |
def Indent(s, prefix='| '): |
... | ... | |
57 | 62 |
Args: |
58 | 63 |
s: The string to indent |
59 | 64 |
prefix: The string to prepend each line. |
65 |
|
|
60 | 66 |
""" |
61 | 67 |
return "%s%s\n" % (prefix, ('\n' + prefix).join(s.splitlines())) |
62 | 68 |
|
... | ... | |
68 | 74 |
cmd: the command to run. |
69 | 75 |
|
70 | 76 |
Raises CommandError with verbose commentary on error. |
77 |
|
|
71 | 78 |
""" |
72 | 79 |
res = utils.RunCmd(cmd) |
73 | 80 |
|
... | ... | |
97 | 104 |
Remove(name): remove record given by name, if exists. |
98 | 105 |
|
99 | 106 |
Save(name): saves all records to file, releases lock and closes file. |
107 |
|
|
100 | 108 |
""" |
101 | 109 |
def __init__(self): |
102 | 110 |
# The two-step dance below is necessary to allow both opening existing |
... | ... | |
128 | 136 |
|
129 | 137 |
Args: |
130 | 138 |
instance - the instance to look up. |
139 |
|
|
131 | 140 |
""" |
132 | 141 |
assert self.statefile |
133 | 142 |
|
... | ... | |
141 | 150 |
|
142 | 151 |
Args: |
143 | 152 |
instance - the instance being restarted |
153 |
|
|
144 | 154 |
""" |
145 | 155 |
assert self.statefile |
146 | 156 |
|
... | ... | |
149 | 159 |
self.inst_map[instance.name] = (when, 1 + self.NumberOfAttempts(instance)) |
150 | 160 |
|
151 | 161 |
def Remove(self, instance): |
152 |
"""Update state to reflect that a machine is running, i.e. remove record |
|
162 |
"""Update state to reflect that a machine is running, i.e. remove record.
|
|
153 | 163 |
|
154 | 164 |
Args: |
155 | 165 |
instance - the instance to remove from books |
156 | 166 |
|
157 |
This method removes the record for a named instance |
|
167 |
This method removes the record for a named instance. |
|
168 |
|
|
158 | 169 |
""" |
159 | 170 |
assert self.statefile |
160 | 171 |
|
... | ... | |
163 | 174 |
|
164 | 175 |
def Save(self): |
165 | 176 |
"""Save records to file, then unlock and close file. |
177 |
|
|
166 | 178 |
""" |
167 | 179 |
assert self.statefile |
168 | 180 |
|
... | ... | |
194 | 206 |
|
195 | 207 |
class InstanceList(object): |
196 | 208 |
"""The set of Virtual Machine instances on a cluster. |
209 |
|
|
197 | 210 |
""" |
198 | 211 |
cmd = ['gnt-instance', 'list', '--lock-retries=15', |
199 | 212 |
'-o', 'name,admin_state,oper_state', '--no-headers', '--separator=:'] |
... | ... | |
221 | 234 |
|
222 | 235 |
class Message(object): |
223 | 236 |
"""Encapsulation of a notice or error message. |
237 |
|
|
224 | 238 |
""" |
225 | 239 |
def __init__(self, level, msg): |
226 | 240 |
self.level = level |
... | ... | |
237 | 251 |
The calling program should periodically instantiate me and call Run(). |
238 | 252 |
This will traverse the list of instances, and make up to MAXTRIES attempts |
239 | 253 |
to restart machines that are down. |
254 |
|
|
240 | 255 |
""" |
241 | 256 |
def __init__(self): |
257 |
sstore = ssconf.SimpleStore() |
|
258 |
master = sstore.GetMasterNode() |
|
259 |
if master != socket.gethostname(): |
|
260 |
raise NotMasterError, ("This is not the master node") |
|
242 | 261 |
self.instances = InstanceList() |
243 | 262 |
self.messages = [] |
244 | 263 |
|
245 | 264 |
def Run(self): |
246 | 265 |
"""Make a pass over the list of instances, restarting downed ones. |
266 |
|
|
247 | 267 |
""" |
248 | 268 |
notepad = RestarterState() |
249 | 269 |
|
... | ... | |
284 | 304 |
notepad.Save() |
285 | 305 |
|
286 | 306 |
def WriteReport(self, logfile): |
287 |
""" |
|
288 |
Log all messages to file. |
|
307 |
"""Log all messages to file. |
|
289 | 308 |
|
290 | 309 |
Args: |
291 | 310 |
logfile: file object open for writing (the log file) |
311 |
|
|
292 | 312 |
""" |
293 | 313 |
for msg in self.messages: |
294 | 314 |
print >> logfile, str(msg) |
... | ... | |
326 | 346 |
restarter = Restarter() |
327 | 347 |
restarter.Run() |
328 | 348 |
restarter.WriteReport(sys.stdout) |
349 |
except NotMasterError: |
|
350 |
if options.debug: |
|
351 |
sys.stderr.write("Not master, exiting.\n") |
|
352 |
sys.exit(constants.EXIT_NOTMASTER) |
|
329 | 353 |
except Error, err: |
330 | 354 |
print err |
331 | 355 |
|
Also available in: Unified diff