Revision 6dfcc47b daemons/ganeti-watcher
b/daemons/ganeti-watcher | ||
---|---|---|
225 | 225 |
cli.SubmitOpCode(op, cl=client) |
226 | 226 |
|
227 | 227 |
|
228 |
def GetInstanceList(with_secondaries=None):
|
|
228 |
def GetClusterData():
|
|
229 | 229 |
"""Get a list of instances on this cluster. |
230 | 230 |
|
231 | 231 |
""" |
232 |
fields = ["name", "status", "admin_state"] |
|
232 |
op1_fields = ["name", "status", "admin_state", "snodes"] |
|
233 |
op1 = opcodes.OpQueryInstances(output_fields=op1_fields, names=[], |
|
234 |
use_locking=True) |
|
235 |
op2_fields = ["name", "bootid", "offline"] |
|
236 |
op2 = opcodes.OpQueryNodes(output_fields=op2_fields, names=[], |
|
237 |
use_locking=True) |
|
233 | 238 |
|
234 |
if with_secondaries is not None: |
|
235 |
fields.append("snodes") |
|
239 |
job_id = client.SubmitJob([op1, op2]) |
|
236 | 240 |
|
237 |
result = client.QueryInstances([], fields, True)
|
|
241 |
all_results = cli.PollJob(job_id, cl=client, feedback_fn=logging.debug)
|
|
238 | 242 |
|
239 |
instances = [] |
|
240 |
for fields in result: |
|
241 |
if with_secondaries is not None: |
|
242 |
(name, status, autostart, snodes) = fields |
|
243 |
|
|
244 |
if not snodes: |
|
245 |
continue |
|
243 |
result = all_results[0] |
|
244 |
smap = {} |
|
246 | 245 |
|
247 |
for node in with_secondaries: |
|
248 |
if node in snodes: |
|
249 |
break |
|
250 |
else: |
|
251 |
continue |
|
252 |
|
|
253 |
else: |
|
254 |
(name, status, autostart) = fields |
|
246 |
instances = {} |
|
247 |
for fields in result: |
|
248 |
(name, status, autostart, snodes) = fields |
|
255 | 249 |
|
256 |
instances.append(Instance(name, status, autostart)) |
|
250 |
# update the secondary node map |
|
251 |
for node in snodes: |
|
252 |
if node not in smap: |
|
253 |
smap[node] = [] |
|
254 |
smap[node].append(name) |
|
257 | 255 |
|
258 |
return instances
|
|
256 |
instances[name] = Instance(name, status, autostart)
|
|
259 | 257 |
|
258 |
nodes = dict([(name, (bootid, offline)) |
|
259 |
for name, bootid, offline in all_results[1]]) |
|
260 | 260 |
|
261 |
def GetNodeBootIDs(): |
|
262 |
"""Get a dict mapping nodes to boot IDs. |
|
261 |
client.ArchiveJob(job_id) |
|
263 | 262 |
|
264 |
""" |
|
265 |
result = client.QueryNodes([], ["name", "bootid", "offline"], True) |
|
266 |
return dict([(name, (bootid, offline)) for name, bootid, offline in result]) |
|
263 |
return instances, nodes, smap |
|
267 | 264 |
|
268 | 265 |
|
269 | 266 |
class Watcher(object): |
... | ... | |
279 | 276 |
master = client.QueryConfigValues(["master_node"])[0] |
280 | 277 |
if master != utils.HostInfo().name: |
281 | 278 |
raise NotMasterError("This is not the master node") |
282 |
self.instances = GetInstanceList() |
|
283 |
self.bootids = GetNodeBootIDs() |
|
279 |
self.instances, self.bootids, self.smap = GetClusterData() |
|
284 | 280 |
self.started_instances = set() |
285 | 281 |
self.opts = opts |
286 | 282 |
|
... | ... | |
321 | 317 |
if check_nodes: |
322 | 318 |
# Activate disks for all instances with any of the checked nodes as a |
323 | 319 |
# secondary node. |
324 |
for instance in GetInstanceList(with_secondaries=check_nodes): |
|
325 |
if not instance.autostart: |
|
326 |
logging.info(("Skipping disk activation for non-autostart" |
|
327 |
" instance %s"), instance.name) |
|
328 |
continue |
|
329 |
if instance.name in self.started_instances: |
|
330 |
# we already tried to start the instance, which should have |
|
331 |
# activated its drives (if they can be at all) |
|
320 |
for node in check_nodes: |
|
321 |
if node not in self.smap: |
|
332 | 322 |
continue |
333 |
try: |
|
334 |
logging.info("Activating disks for instance %s", instance.name) |
|
335 |
instance.ActivateDisks() |
|
336 |
except Exception: |
|
337 |
logging.exception("Error while activating disks for instance %s", |
|
338 |
instance.name) |
|
323 |
for instance_name in self.smap[node]: |
|
324 |
instance = self.instances[instance_name] |
|
325 |
if not instance.autostart: |
|
326 |
logging.info(("Skipping disk activation for non-autostart" |
|
327 |
" instance %s"), instance.name) |
|
328 |
continue |
|
329 |
if instance.name in self.started_instances: |
|
330 |
# we already tried to start the instance, which should have |
|
331 |
# activated its drives (if they can be at all) |
|
332 |
continue |
|
333 |
try: |
|
334 |
logging.info("Activating disks for instance %s", instance.name) |
|
335 |
instance.ActivateDisks() |
|
336 |
except Exception: |
|
337 |
logging.exception("Error while activating disks for instance %s", |
|
338 |
instance.name) |
|
339 | 339 |
|
340 | 340 |
# Keep changed boot IDs |
341 | 341 |
for name in check_nodes: |
... | ... | |
345 | 345 |
"""Make a pass over the list of instances, restarting downed ones. |
346 | 346 |
|
347 | 347 |
""" |
348 |
for instance in self.instances: |
|
348 |
for instance in self.instances.values():
|
|
349 | 349 |
if instance.state in BAD_STATES: |
350 | 350 |
n = notepad.NumberOfRestartAttempts(instance) |
351 | 351 |
|
... | ... | |
383 | 383 |
|
384 | 384 |
""" |
385 | 385 |
op = opcodes.OpVerifyDisks() |
386 |
result = cli.SubmitOpCode(op, cl=client) |
|
386 |
job_id = client.SubmitJob([op]) |
|
387 |
result = cli.PollJob(job_id, cl=client, feedback_fn=logging.debug)[0] |
|
388 |
client.ArchiveJob(job_id) |
|
387 | 389 |
if not isinstance(result, (tuple, list)): |
388 | 390 |
logging.error("Can't get a valid result from verify-disks") |
389 | 391 |
return |
Also available in: Unified diff