from ganeti import errors
from ganeti import hypervisor
from ganeti import locking
-from ganeti import config
from ganeti import constants
from ganeti import objects
from ganeti import opcodes
-from ganeti import ssconf
from ganeti import serializer
self.sstore = sstore
self.context = context
self.needed_locks = None
+ self.share_locks = dict(((i, 0) for i in locking.LEVELS))
+ # Used to force good behavior when calling helper functions
+ self.recalculate_locks = {}
self.__ssh = None
for attr_name in self._OP_REQP:
(this reflects what LockSet does, and will be replaced before
CheckPrereq with the full list of nodes that have been locked)
+ If you need to share locks (rather than acquire them exclusively) at one
+ level you can modify self.share_locks, setting a true value (usually 1) for
+ that level. By default locks are not shared.
+
Examples:
# Acquire all nodes and one instance
self.needed_locks = {
else:
raise NotImplementedError
+ def DeclareLocks(self, level):
+ """Declare LU locking needs for a level
+
+ While most LUs can just declare their locking needs at ExpandNames time,
+ sometimes there's the need to calculate some locks after having acquired
+ the ones before. This function is called just before acquiring locks at a
+ particular level, but after acquiring the ones at lower levels, and permits
+ such calculations. It can be used to modify self.needed_locks, and by
+ default it does nothing.
+
+ This function is only called if you have something already set in
+ self.needed_locks for the level.
+
+ @param level: Locking level which is going to be locked
+ @type level: member of ganeti.locking.LEVELS
+
+ """
+
def CheckPrereq(self):
"""Check prerequisites for this LU.
self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
self.op.instance_name = expanded_name
+ def _LockInstancesNodes(self):
+ """Helper function to declare instances' nodes for locking.
+
+ This function should be called after locking one or more instances to lock
+ their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
+ with all primary or secondary nodes for instances already locked and
+ present in self.needed_locks[locking.LEVEL_INSTANCE].
+
+ It should be called from DeclareLocks, and for safety only works if
+ self.recalculate_locks[locking.LEVEL_NODE] is set.
+
+ In the future it may grow parameters to just lock some instance's nodes, or
+ to just lock primaries or secondary nodes, if needed.
+
+ If should be called in DeclareLocks in a way similar to:
+
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+
+ """
+ assert locking.LEVEL_NODE in self.recalculate_locks, \
+ "_LockInstancesNodes helper function called with no nodes to recalculate"
+
+ # TODO: check if we're really been called with the instance locks held
+
+ # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
+ # future we might want to have different behaviors depending on the value
+ # of self.recalculate_locks[locking.LEVEL_NODE]
+ wanted_nodes = []
+ for instance_name in self.needed_locks[locking.LEVEL_INSTANCE]:
+ instance = self.context.cfg.GetInstanceInfo(instance_name)
+ wanted_nodes.append(instance.primary_node)
+ wanted_nodes.extend(instance.secondary_nodes)
+ self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
+
+ del self.recalculate_locks[locking.LEVEL_NODE]
+
class NoHooksLU(LogicalUnit):
"""Simple LU which runs no hooks.
"""
master = self.sstore.GetMasterNode()
- if not rpc.call_node_stop_master(master):
+ if not rpc.call_node_stop_master(master, False):
raise errors.OpExecError("Could not disable the master role")
priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
utils.CreateBackup(priv_key)
lu_result: previous Exec result
"""
- # We only really run POST phase hooks, and are only interested in their results
+ # We only really run POST phase hooks, and are only interested in
+ # their results
if phase == constants.HOOKS_PHASE_POST:
# Used to change hooks' output to proper indentation
indent_re = re.compile('^', re.M)
# shutdown the master IP
master = ss.GetMasterNode()
- if not rpc.call_node_stop_master(master):
+ if not rpc.call_node_stop_master(master, False):
raise errors.OpExecError("Could not disable the master role")
try:
logger.Error("copy of file %s to node %s failed" %
(fname, to_node))
finally:
- if not rpc.call_node_start_master(master):
+ if not rpc.call_node_start_master(master, False):
logger.Error("Could not re-enable the master role on the master,"
" please restart manually.")
self.context.glm.add(locking.LEVEL_NODE, node)
-class LUMasterFailover(LogicalUnit):
- """Failover the master node to the current node.
-
- This is a special LU in that it must run on a non-master node.
-
- """
- HPATH = "master-failover"
- HTYPE = constants.HTYPE_CLUSTER
- REQ_MASTER = False
- REQ_WSSTORE = True
- _OP_REQP = []
-
- def BuildHooksEnv(self):
- """Build hooks env.
-
- This will run on the new master only in the pre phase, and on all
- the nodes in the post phase.
-
- """
- env = {
- "OP_TARGET": self.new_master,
- "NEW_MASTER": self.new_master,
- "OLD_MASTER": self.old_master,
- }
- return env, [self.new_master], self.cfg.GetNodeList()
-
- def CheckPrereq(self):
- """Check prerequisites.
-
- This checks that we are not already the master.
-
- """
- self.new_master = utils.HostInfo().name
- self.old_master = self.sstore.GetMasterNode()
-
- if self.old_master == self.new_master:
- raise errors.OpPrereqError("This commands must be run on the node"
- " where you want the new master to be."
- " %s is already the master" %
- self.old_master)
-
- def Exec(self, feedback_fn):
- """Failover the master node.
-
- This command, when run on a non-master node, will cause the current
- master to cease being master, and the non-master to become new
- master.
-
- """
- #TODO: do not rely on gethostname returning the FQDN
- logger.Info("setting master to %s, old master: %s" %
- (self.new_master, self.old_master))
-
- if not rpc.call_node_stop_master(self.old_master):
- logger.Error("could disable the master role on the old master"
- " %s, please disable manually" % self.old_master)
-
- ss = self.sstore
- ss.SetKey(ss.SS_MASTER_NODE, self.new_master)
- if not rpc.call_upload_file(self.cfg.GetNodeList(),
- ss.KeyToFilename(ss.SS_MASTER_NODE)):
- logger.Error("could not distribute the new simple store master file"
- " to the other nodes, please check.")
-
- if not rpc.call_node_start_master(self.new_master):
- logger.Error("could not start the master role on the new master"
- " %s, please check" % self.new_master)
- feedback_fn("Error in activating the master IP on the new master,"
- " please fix manually.")
-
-
-
class LUQueryClusterInfo(NoHooksLU):
"""Query cluster configuration.
HPATH = "instance-start"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "force"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
# check bridges existance
_CheckInstanceBridgesExist(instance)
"starting instance %s" % instance.name,
instance.memory)
- self.instance = instance
- self.op.instance_name = instance.name
-
def Exec(self, feedback_fn):
"""Start the instance.
HPATH = "instance-reboot"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
# check bridges existance
_CheckInstanceBridgesExist(instance)
- self.instance = instance
- self.op.instance_name = instance.name
-
def Exec(self, feedback_fn):
"""Reboot the instance.
HPATH = "instance-stop"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
- self.instance = instance
+ self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
def Exec(self, feedback_fn):
"""Shutdown the instance.
HPATH = "instance-reinstall"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster and is not running.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+
if instance.disk_template == constants.DT_DISKLESS:
raise errors.OpPrereqError("Instance '%s' has no disks" %
self.op.instance_name)
old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
self.cfg.RenameInstance(inst.name, self.op.new_name)
+ # Change the instance lock. This is definitely safe while we hold the BGL
+ self.context.glm.remove(locking.LEVEL_INSTANCE, inst.name)
+ self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
# re-read the instance from the configuration after rename
inst = self.cfg.GetInstanceInfo(self.op.new_name)
HPATH = "instance-failover"
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "ignore_consistency"]
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = 'replace'
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This checks that the instance is in the cluster.
"""
- instance = self.cfg.GetInstanceInfo(
- self.cfg.ExpandInstanceName(self.op.instance_name))
- if instance is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
if instance.disk_template not in constants.DTS_NET_MIRROR:
raise errors.OpPrereqError("Instance's disk layout is not"
" exist on destination node '%s'" %
(brlist, target_node))
- self.instance = instance
-
def Exec(self, feedback_fn):
"""Failover an instance.
if self.op.shutdown:
# shutdown the instance, but not the disks
if not rpc.call_instance_shutdown(src_node, instance):
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, src_node))
+ raise errors.OpExecError("Could not shutdown instance %s on node %s" %
+ (instance.name, src_node))
vgname = self.cfg.GetVGName()
"""Returns the tag list.
"""
- return self.target.GetTags()
+ return list(self.target.GetTags())
class LUSearchTags(NoHooksLU):
if rcode == constants.IARUN_NOTFOUND:
raise errors.OpExecError("Can't find allocator '%s'" % name)
elif rcode == constants.IARUN_FAILURE:
- raise errors.OpExecError("Instance allocator call failed: %s,"
- " output: %s" %
- (fail, stdout+stderr))
+ raise errors.OpExecError("Instance allocator call failed: %s,"
+ " output: %s" % (fail, stdout+stderr))
self.out_text = stdout
if validate:
self._ValidateResult()