#
#
-# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
from ganeti import objects
from ganeti import ssconf
from ganeti import ht
+from ganeti import pathutils
import ganeti.rapi.client # pylint: disable=W0611
+from ganeti.rapi.client import UsesRapiClient
from ganeti.watcher import nodemaint
from ganeti.watcher import state
MAXTRIES = 5
-BAD_STATES = frozenset([
+BAD_STATES = compat.UniqueFrozenset([
constants.INSTST_ERRORDOWN,
])
-HELPLESS_STATES = frozenset([
+HELPLESS_STATES = compat.UniqueFrozenset([
constants.INSTST_NODEDOWN,
constants.INSTST_NODEOFFLINE,
])
"""Check whether we should pause.
"""
- return bool(utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE))
+ return bool(utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE))
def StartNodeDaemons():
# on master or not, try to start the node daemon
utils.EnsureDaemon(constants.NODED)
# start confd as well. On non candidates it will be in disabled mode.
- utils.EnsureDaemon(constants.CONFD)
+ if constants.ENABLE_CONFD:
+ utils.EnsureDaemon(constants.CONFD)
+ # start mond as well: all nodes need monitoring
+ if constants.ENABLE_MOND:
+ utils.EnsureDaemon(constants.MOND)
def RunWatcherHooks():
"""Run the watcher hooks.
"""
- hooks_dir = utils.PathJoin(constants.HOOKS_BASE_DIR,
+ hooks_dir = utils.PathJoin(pathutils.HOOKS_BASE_DIR,
constants.HOOKS_NAME_WATCHER)
if not os.path.isdir(hooks_dir):
return
try:
results = utils.RunParts(hooks_dir)
- except Exception: # pylint: disable=W0703
- logging.exception("RunParts %s failed: %s", hooks_dir)
+ except Exception, err: # pylint: disable=W0703
+ logging.exception("RunParts %s failed: %s", hooks_dir, err)
return
for (relname, status, runresult) in results:
"""Abstraction for a Virtual Machine instance.
"""
- def __init__(self, name, status, autostart, snodes):
+ def __init__(self, name, status, disks_active, snodes):
self.name = name
self.status = status
- self.autostart = autostart
+ self.disks_active = disks_active
self.snodes = snodes
def Restart(self, cl):
instance_name)
continue
- if not inst.autostart:
- logging.info("Skipping disk activation for non-autostart"
- " instance '%s'", inst.name)
+ if not inst.disks_active:
+ logging.info("Skipping disk activation for instance with not"
+ " activated disks '%s'", inst.name)
continue
if inst.name in started:
continue
if inst.status in HELPLESS_STATES or _CheckForOfflineNodes(nodes, inst):
- logging.info("Skipping instance '%s' because it is in a helpless state or"
- " has offline secondaries", name)
+ logging.info("Skipping instance '%s' because it is in a helpless state"
+ " or has offline secondaries", name)
continue
job.append(opcodes.OpInstanceActivateDisks(instance_name=name))
parser.add_option("--wait-children", dest="wait_children",
action="store_true", help="Wait for child processes")
parser.add_option("--no-wait-children", dest="wait_children",
- action="store_false", help="Don't wait for child processes")
+ action="store_false",
+ help="Don't wait for child processes")
# See optparse documentation for why default values are not set by options
parser.set_defaults(wait_children=True)
options, args = parser.parse_args()
for inst in instances])
-class _StatCb:
- """Helper to store file handle's C{fstat}.
-
- """
- def __init__(self):
- """Initializes this class.
-
- """
- self.st = None
-
- def __call__(self, fh):
- """Calls C{fstat} on file handle.
-
- """
- self.st = os.fstat(fh.fileno())
-
-
def _ReadInstanceStatus(filename):
"""Reads an instance status file.
"""
logging.debug("Reading per-group instance status from '%s'", filename)
- statcb = _StatCb()
+ statcb = utils.FileStatHelper()
try:
content = utils.ReadFile(filename, preread=statcb)
except EnvironmentError, err:
raise NotMasterError("This is not the master node")
-@rapi.client.UsesRapiClient
+@UsesRapiClient
def _GlobalWatcher(opts):
"""Main function for global watcher.
job = [
# Get all primary instances in group
opcodes.OpQuery(what=constants.QR_INSTANCE,
- fields=["name", "status", "admin_state", "snodes",
+ fields=["name", "status", "disks_active", "snodes",
"pnode.group.uuid", "snodes.group.uuid"],
- filter=[qlang.OP_EQUAL, "pnode.group.uuid", uuid],
+ qfilter=[qlang.OP_EQUAL, "pnode.group.uuid", uuid],
use_locking=True),
# Get all nodes in group
opcodes.OpQuery(what=constants.QR_NODE,
fields=["name", "bootid", "offline"],
- filter=[qlang.OP_EQUAL, "group.uuid", uuid],
+ qfilter=[qlang.OP_EQUAL, "group.uuid", uuid],
use_locking=True),
]
instances = []
# Load all instances
- for (name, status, autostart, snodes, pnode_group_uuid,
+ for (name, status, disks_active, snodes, pnode_group_uuid,
snodes_group_uuid) in raw_instances:
if snodes and set([pnode_group_uuid]) != set(snodes_group_uuid):
logging.error("Ignoring split instance '%s', primary group %s, secondary"
" groups %s", name, pnode_group_uuid,
utils.CommaJoin(snodes_group_uuid))
else:
- instances.append(Instance(name, status, autostart, snodes))
+ instances.append(Instance(name, status, disks_active, snodes))
for node in snodes:
secondaries.setdefault(node, set()).add(name)
raise errors.GenericError("Node group '%s' is not known by ssconf" %
group_uuid)
- # Group UUID has been verified and should not contain any dangerous characters
- state_path = constants.WATCHER_GROUP_STATE_FILE % group_uuid
- inst_status_path = constants.WATCHER_GROUP_INSTANCE_STATUS_FILE % group_uuid
+ # Group UUID has been verified and should not contain any dangerous
+ # characters
+ state_path = pathutils.WATCHER_GROUP_STATE_FILE % group_uuid
+ inst_status_path = pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE % group_uuid
logging.debug("Using state file %s", state_path)
# Update per-group instance status file
_UpdateInstanceStatus(inst_status_path, instances.values())
- _MergeInstanceStatus(constants.INSTANCE_STATUS_FILE,
- constants.WATCHER_GROUP_INSTANCE_STATUS_FILE,
+ _MergeInstanceStatus(pathutils.INSTANCE_STATUS_FILE,
+ pathutils.WATCHER_GROUP_INSTANCE_STATUS_FILE,
known_groups)
started = _CheckInstances(client, notepad, instances)
"""
(options, _) = ParseOptions()
- utils.SetupLogging(constants.LOG_WATCHER, sys.argv[0],
+ utils.SetupLogging(pathutils.LOG_WATCHER, sys.argv[0],
debug=options.debug, stderr_logging=options.debug)
if ShouldPause() and not options.ignore_pause:
return constants.EXIT_SUCCESS
# Try to acquire global watcher lock in shared mode
- lock = utils.FileLock.Open(constants.WATCHER_LOCK_FILE)
+ lock = utils.FileLock.Open(pathutils.WATCHER_LOCK_FILE)
try:
lock.Shared(blocking=False)
except (EnvironmentError, errors.LockError), err:
logging.error("Can't acquire lock on %s: %s",
- constants.WATCHER_LOCK_FILE, err)
+ pathutils.WATCHER_LOCK_FILE, err)
return constants.EXIT_SUCCESS
if options.nodegroup is None: