Revision 24edc6d4 daemons/ganeti-watcher
b/daemons/ganeti-watcher | ||
---|---|---|
298 | 298 |
master = client.QueryConfigValues(["master_node"])[0] |
299 | 299 |
if master != utils.HostInfo().name: |
300 | 300 |
raise NotMasterError("This is not the master node") |
301 |
# first archive old jobs |
|
302 |
self.ArchiveJobs(opts.job_age) |
|
303 |
# and only then submit new ones |
|
301 | 304 |
self.instances, self.bootids, self.smap = GetClusterData() |
302 | 305 |
self.started_instances = set() |
303 | 306 |
self.opts = opts |
... | ... | |
307 | 310 |
|
308 | 311 |
""" |
309 | 312 |
notepad = self.notepad |
310 |
self.ArchiveJobs(self.opts.job_age) |
|
311 | 313 |
self.CheckInstances(notepad) |
312 | 314 |
self.CheckDisks(notepad) |
313 | 315 |
self.VerifyDisks() |
314 | 316 |
|
315 |
def ArchiveJobs(self, age): |
|
317 |
@staticmethod |
|
318 |
def ArchiveJobs(age): |
|
316 | 319 |
"""Archive old jobs. |
317 | 320 |
|
318 | 321 |
""" |
... | ... | |
459 | 462 |
utils.SetupLogging(constants.LOG_WATCHER, debug=options.debug, |
460 | 463 |
stderr_logging=options.debug) |
461 | 464 |
|
462 |
update_file = True
|
|
465 |
update_file = False
|
|
463 | 466 |
try: |
464 | 467 |
notepad = WatcherState() |
465 | 468 |
try: |
... | ... | |
468 | 471 |
except errors.OpPrereqError: |
469 | 472 |
# this is, from cli.GetClient, a not-master case |
470 | 473 |
logging.debug("Not on master, exiting") |
474 |
update_file = True |
|
471 | 475 |
sys.exit(constants.EXIT_SUCCESS) |
472 | 476 |
except luxi.NoMasterError, err: |
473 | 477 |
logging.warning("Master seems to be down (%s), trying to restart", |
474 | 478 |
str(err)) |
475 | 479 |
if not StartMaster(): |
476 | 480 |
logging.critical("Can't start the master, exiting") |
477 |
update_file = False |
|
478 | 481 |
sys.exit(constants.EXIT_FAILURE) |
479 | 482 |
# else retry the connection |
480 | 483 |
client = cli.GetClient() |
... | ... | |
483 | 486 |
watcher = Watcher(options, notepad) |
484 | 487 |
except errors.ConfigurationError: |
485 | 488 |
# Just exit if there's no configuration |
489 |
update_file = True |
|
486 | 490 |
sys.exit(constants.EXIT_SUCCESS) |
487 | 491 |
|
488 | 492 |
watcher.Run() |
493 |
update_file = True |
|
494 |
|
|
489 | 495 |
finally: |
490 | 496 |
if update_file: |
491 | 497 |
notepad.Save() |
... | ... | |
499 | 505 |
except errors.ResolverError, err: |
500 | 506 |
logging.error("Cannot resolve hostname '%s', exiting.", err.args[0]) |
501 | 507 |
sys.exit(constants.EXIT_NODESETUP_ERROR) |
508 |
except errors.JobQueueFull: |
|
509 |
logging.error("Job queue is full, can't query cluster state") |
|
510 |
except errors.JobQueueDrainError: |
|
511 |
logging.error("Job queue is drained, can't maintain cluster state") |
|
502 | 512 |
except Exception, err: |
503 | 513 |
logging.error(str(err), exc_info=True) |
504 | 514 |
sys.exit(constants.EXIT_FAILURE) |
Also available in: Unified diff