#
#
-# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
from ganeti import objects
from ganeti import ssconf
from ganeti import ht
+from ganeti import pathutils
import ganeti.rapi.client # pylint: disable=W0611
+from ganeti.rapi.client import UsesRapiClient
from ganeti.watcher import nodemaint
from ganeti.watcher import state
MAXTRIES = 5
-BAD_STATES = frozenset([
+BAD_STATES = compat.UniqueFrozenset([
constants.INSTST_ERRORDOWN,
])
-HELPLESS_STATES = frozenset([
+HELPLESS_STATES = compat.UniqueFrozenset([
constants.INSTST_NODEDOWN,
constants.INSTST_NODEOFFLINE,
])
"""Check whether we should pause.
"""
- return bool(utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE))
+ return bool(utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE))
def StartNodeDaemons():
# start confd as well. On non candidates it will be in disabled mode.
if constants.ENABLE_CONFD:
utils.EnsureDaemon(constants.CONFD)
+ # start mond as well: all nodes need monitoring
+ if constants.ENABLE_MOND:
+ utils.EnsureDaemon(constants.MOND)
def RunWatcherHooks():
"""Run the watcher hooks.
"""
- hooks_dir = utils.PathJoin(constants.HOOKS_BASE_DIR,
+ hooks_dir = utils.PathJoin(pathutils.HOOKS_BASE_DIR,
constants.HOOKS_NAME_WATCHER)
if not os.path.isdir(hooks_dir):
return
"""Abstraction for a Virtual Machine instance.
"""
- def __init__(self, name, status, autostart, snodes):
+ def __init__(self, name, status, disks_active, snodes):
self.name = name
self.status = status
- self.autostart = autostart
+ self.disks_active = disks_active
self.snodes = snodes
def Restart(self, cl):
instance_name)
continue
- if not inst.autostart:
- logging.info("Skipping disk activation for non-autostart"
- " instance '%s'", inst.name)
+ if not inst.disks_active:
+ logging.info("Skipping disk activation for instance with not"
+ " activated disks '%s'", inst.name)
continue
if inst.name in started:
raise NotMasterError("This is not the master node")
-@rapi.client.UsesRapiClient
+@UsesRapiClient
def _GlobalWatcher(opts):
"""Main function for global watcher.
job = [
# Get all primary instances in group
opcodes.OpQuery(what=constants.QR_INSTANCE,
- fields=["name", "status", "admin_state", "snodes",
+ fields=["name", "status", "disks_active", "snodes",
"pnode.group.uuid", "snodes.group.uuid"],
qfilter=[qlang.OP_EQUAL, "pnode.group.uuid", uuid],
use_locking=True),
instances = []
# Load all instances
- for (name, status, autostart, snodes, pnode_group_uuid,
+ for (name, status, disks_active, snodes, pnode_group_uuid,
snodes_group_uuid) in raw_instances:
if snodes and set([pnode_group_uuid]) != set(snodes_group_uuid):
logging.error("Ignoring split instance '%s', primary group %s, secondary"
" groups %s", name, pnode_group_uuid,
utils.CommaJoin(snodes_group_uuid))
else:
- instances.append(Instance(name, status, autostart, snodes))
+ instances.append(Instance(name, status, disks_active, snodes))
for node in snodes:
secondaries.setdefault(node, set()).add(name)
# Group UUID has been verified and should not contain any dangerous
# characters
- state_path = constants.WATCHER_GROUP_STATE_FILE % group_uuid
- inst_status_path = constants.WATCHER_GROUP_INSTANCE_STATUS_FILE % group_uuid
+ state_path = pathutils.WATCHER_GROUP_STATE_FILE % group_uuid
+ inst_status_path = pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE % group_uuid
logging.debug("Using state file %s", state_path)
# Update per-group instance status file
_UpdateInstanceStatus(inst_status_path, instances.values())
- _MergeInstanceStatus(constants.INSTANCE_STATUS_FILE,
- constants.WATCHER_GROUP_INSTANCE_STATUS_FILE,
+ _MergeInstanceStatus(pathutils.INSTANCE_STATUS_FILE,
+ pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE,
known_groups)
started = _CheckInstances(client, notepad, instances)
"""
(options, _) = ParseOptions()
- utils.SetupLogging(constants.LOG_WATCHER, sys.argv[0],
+ utils.SetupLogging(pathutils.LOG_WATCHER, sys.argv[0],
debug=options.debug, stderr_logging=options.debug)
if ShouldPause() and not options.ignore_pause:
return constants.EXIT_SUCCESS
# Try to acquire global watcher lock in shared mode
- lock = utils.FileLock.Open(constants.WATCHER_LOCK_FILE)
+ lock = utils.FileLock.Open(pathutils.WATCHER_LOCK_FILE)
try:
lock.Shared(blocking=False)
except (EnvironmentError, errors.LockError), err:
logging.error("Can't acquire lock on %s: %s",
- constants.WATCHER_LOCK_FILE, err)
+ pathutils.WATCHER_LOCK_FILE, err)
return constants.EXIT_SUCCESS
if options.nodegroup is None: