ntime_diff)
def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
- """Check the node time.
+ """Check the node LVM results.
@type ninfo: L{objects.Node}
@param ninfo: the node to check
_ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
" '%s' of VG '%s'", pvname, owner_vg)
+ def _VerifyNodeBridges(self, ninfo, nresult, bridges):
+ """Check the node bridges.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param bridges: the expected list of bridges
+
+ """
+ if not bridges:
+ return
+
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
+
+ missing = nresult.get(constants.NV_BRIDGES, None)
+ test = not isinstance(missing, list)
+ _ErrorIf(test, self.ENODENET, node,
+ "did not return valid bridge information")
+ if not test:
+ _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
+ utils.CommaJoin(sorted(missing)))
+
def _VerifyNodeNetwork(self, ninfo, nresult):
- """Check the node time.
+ """Check the node network connectivity results.
@type ninfo: L{objects.Node}
@param ninfo: the node to check
drbd_helper = self.cfg.GetDRBDHelper()
hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
cluster = self.cfg.GetClusterInfo()
- nodelist = utils.NiceSort(self.cfg.GetNodeList())
- nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
- nodeinfo_byname = dict(zip(nodelist, nodeinfo))
- instancelist = utils.NiceSort(self.cfg.GetInstanceList())
- instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
- for iname in instancelist)
+ nodeinfo_byname = self.cfg.GetAllNodesInfo()
+ nodelist = utils.NiceSort(nodeinfo_byname.keys())
+ nodeinfo = [nodeinfo_byname[nname] for nname in nodelist]
+ instanceinfo = self.cfg.GetAllInstancesInfo()
+ instancelist = utils.NiceSort(instanceinfo.keys())
groupinfo = self.cfg.GetAllNodeGroupsInfo()
i_non_redundant = [] # Non redundant instances
i_non_a_balanced = [] # Non auto-balanced instances
if drbd_helper:
node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
+ # bridge checks
+ # FIXME: this needs to be changed per node-group, not cluster-wide
+ bridges = set()
+ default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
+ if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
+ bridges.add(default_nicpp[constants.NIC_LINK])
+ for instance in instanceinfo.values():
+ for nic in instance.nics:
+ full_nic = cluster.SimpleFillNIC(nic.nicparams)
+ if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
+ bridges.add(full_nic[constants.NIC_LINK])
+
+ if bridges:
+ node_verify_param[constants.NV_BRIDGES] = list(bridges)
+
# Build our expected cluster state
node_image = dict((node.name, self.NodeImage(offline=node.offline,
name=node.name,
if refos_img is None:
refos_img = nimg
self._VerifyNodeOS(node_i, nimg, refos_img)
+ self._VerifyNodeBridges(node_i, nresult, bridges)
feedback_fn("* Verifying instance status")
for instance in instancelist:
# if we're moving instances to routed, check that they have an ip
target_mode = params_filled[constants.NIC_MODE]
if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
- nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
- (instance.name, nic_idx))
+ nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
+ " address" % (instance.name, nic_idx))
if nic_errors:
raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
"\n".join(nic_errors))
instance = self.instance
force = self.op.force
- self.cfg.MarkInstanceUp(instance.name)
+ if not self.op.no_remember:
+ self.cfg.MarkInstanceUp(instance.name)
if self.primary_offline:
assert self.op.ignore_offline_nodes
node_current = instance.primary_node
timeout = self.op.timeout
- self.cfg.MarkInstanceDown(instance.name)
+ if not self.op.no_remember:
+ self.cfg.MarkInstanceDown(instance.name)
if self.primary_offline:
assert self.op.ignore_offline_nodes
HTYPE = constants.HTYPE_INSTANCE
REQ_BGL = False
+ def CheckArguments(self):
+ # normalise the disk list
+ self.op.disks = sorted(frozenset(self.op.disks))
+
def ExpandNames(self):
self._ExpandAndLockInstance()
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+ if self.op.nodes:
+ self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
+ self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
+ else:
+ self.needed_locks[locking.LEVEL_NODE] = []
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ # if we replace the nodes, we only need to lock the old primary,
+ # otherwise we need to lock all nodes for disk re-creation
+ primary_only = bool(self.op.nodes)
+ self._LockInstancesNodes(primary_only=primary_only)
def BuildHooksEnv(self):
"""Build hooks env.
instance = self.cfg.GetInstanceInfo(self.op.instance_name)
assert instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
- _CheckNodeOnline(self, instance.primary_node)
+ if self.op.nodes:
+ if len(self.op.nodes) != len(instance.all_nodes):
+ raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
+ " %d replacement nodes were specified" %
+ (instance.name, len(instance.all_nodes),
+ len(self.op.nodes)),
+ errors.ECODE_INVAL)
+ assert instance.disk_template != constants.DT_DRBD8 or \
+ len(self.op.nodes) == 2
+ assert instance.disk_template != constants.DT_PLAIN or \
+ len(self.op.nodes) == 1
+ primary_node = self.op.nodes[0]
+ else:
+ primary_node = instance.primary_node
+ _CheckNodeOnline(self, primary_node)
if instance.disk_template == constants.DT_DISKLESS:
raise errors.OpPrereqError("Instance '%s' has no disks" %
self.op.instance_name, errors.ECODE_INVAL)
- _CheckInstanceDown(self, instance, "cannot recreate disks")
+ # if we replace nodes *and* the old primary is offline, we don't
+ # check
+ assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
+ old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
+ if not (self.op.nodes and old_pnode.offline):
+ _CheckInstanceDown(self, instance, "cannot recreate disks")
if not self.op.disks:
self.op.disks = range(len(instance.disks))
if idx >= len(instance.disks):
raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
errors.ECODE_INVAL)
-
+ if self.op.disks != range(len(instance.disks)) and self.op.nodes:
+ raise errors.OpPrereqError("Can't recreate disks partially and"
+ " change the nodes at the same time",
+ errors.ECODE_INVAL)
self.instance = instance
def Exec(self, feedback_fn):
"""Recreate the disks.
"""
+ # change primary node, if needed
+ if self.op.nodes:
+ self.instance.primary_node = self.op.nodes[0]
+ self.LogWarning("Changing the instance's nodes, you will have to"
+ " remove any disks left on the older nodes manually")
+
to_skip = []
- for idx, _ in enumerate(self.instance.disks):
+ for idx, disk in enumerate(self.instance.disks):
if idx not in self.op.disks: # disk idx has not been passed in
to_skip.append(idx)
continue
+ # update secondaries for disks, if needed
+ if self.op.nodes:
+ if disk.dev_type == constants.LD_DRBD8:
+ # need to update the nodes
+ assert len(self.op.nodes) == 2
+ logical_id = list(disk.logical_id)
+ logical_id[0] = self.op.nodes[0]
+ logical_id[1] = self.op.nodes[1]
+ disk.logical_id = tuple(logical_id)
+
+ if self.op.nodes:
+ self.cfg.Update(self.instance, feedback_fn)
_CreateDisks(self, self.instance, to_skip=to_skip)
new_name = self.op.new_name
if self.op.name_check:
hostname = netutils.GetHostname(name=new_name)
- self.LogInfo("Resolved given name '%s' to '%s'", new_name,
- hostname.name)
+ if hostname != new_name:
+ self.LogInfo("Resolved given name '%s' to '%s'", new_name,
+ hostname.name)
new_name = self.op.new_name = hostname.name
if (self.op.ip_check and
netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
raise errors.OpPrereqError("Invalid file driver name '%s'" %
self.op.file_driver, errors.ECODE_INVAL)
- if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
- raise errors.OpPrereqError("File storage directory path not absolute",
- errors.ECODE_INVAL)
+ if self.op.disk_template == constants.DT_FILE:
+ opcodes.RequireFileStorage()
### Node/iallocator related checks
_CheckIAllocatorOrNode(self, "iallocator", "pnode")
if name in os_defs and os_defs[name] == self.op.osparams[name]:
del self.op.osparams[name]
+ def _CalculateFileStorageDir(self):
+ """Calculate final instance file storage dir.
+
+ """
+ # file storage dir calculation/check
+ self.instance_file_storage_dir = None
+ if self.op.disk_template == constants.DT_FILE:
+ # build the full file storage dir path
+ joinargs = []
+
+ cfg_storagedir = self.cfg.GetFileStorageDir()
+ if not cfg_storagedir:
+ raise errors.OpPrereqError("Cluster file storage dir not defined")
+ joinargs.append(cfg_storagedir)
+
+ if self.op.file_storage_dir is not None:
+ joinargs.append(self.op.file_storage_dir)
+
+ joinargs.append(self.op.instance_name)
+
+ # pylint: disable-msg=W0142
+ self.instance_file_storage_dir = utils.PathJoin(*joinargs)
+
def CheckPrereq(self):
"""Check prerequisites.
"""
+ self._CalculateFileStorageDir()
+
if self.op.mode == constants.INSTANCE_IMPORT:
export_info = self._ReadExportInfo()
self._ReadExportParams(export_info)
else:
network_port = None
- if constants.ENABLE_FILE_STORAGE:
- # this is needed because os.path.join does not accept None arguments
- if self.op.file_storage_dir is None:
- string_file_storage_dir = ""
- else:
- string_file_storage_dir = self.op.file_storage_dir
-
- # build the full file storage dir path
- file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
- string_file_storage_dir, instance)
- else:
- file_storage_dir = ""
-
disks = _GenerateDiskTemplate(self,
self.op.disk_template,
instance, pnode_name,
self.secondaries,
self.disks,
- file_storage_dir,
+ self.instance_file_storage_dir,
self.op.file_driver,
0,
feedback_fn)
instance.FindDisk(disk_idx)
# Get secondary node IP addresses
- node_2nd_ip = {}
-
- for node_name in [self.target_node, self.other_node, self.new_node]:
- if node_name is not None:
- node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
-
- self.node_secondary_ip = node_2nd_ip
+ self.node_secondary_ip = \
+ dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
+ for node_name in touched_nodes
+ if node_name is not None)
def Exec(self, feedback_fn):
"""Execute disk replacement.
else:
fn = self._ExecDrbd8DiskOnly
- return fn(feedback_fn)
-
+ result = fn(feedback_fn)
finally:
# Deactivate the instance disks if we're replacing them on a
# down instance
if activate_disks:
_SafeShutdownInstanceDisks(self.lu, self.instance)
- if __debug__:
- # Verify owned locks
- owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
- assert ((self.early_release and not owned_locks) or
- (not self.early_release and
- set(owned_locks) == set(self.node_secondary_ip))), \
- ("Not owning the correct locks, early_release=%s, owned=%r" %
- (self.early_release, owned_locks))
+ if __debug__:
+ # Verify owned locks
+ owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
+ assert ((self.early_release and not owned_locks) or
+ (not self.early_release and
+ set(owned_locks) == set(self.node_secondary_ip))), \
+ ("Not owning the correct locks, early_release=%s, owned=%r" %
+ (self.early_release, owned_locks))
+
+ return result
def _CheckVolumeGroup(self, nodes):
self.lu.LogInfo("Checking volume groups")
(node_name, self.instance.name))
def _CreateNewStorage(self, node_name):
+ """Create new storage on the primary or secondary node.
+
+ This is only used for same-node replaces, not for changing the
+ secondary node, hence we don't want to modify the existing disk.
+
+ """
iv_names = {}
for idx, dev in enumerate(self.instance.disks):
logical_id=(vg_meta, names[1]))
new_lvs = [lv_data, lv_meta]
- old_lvs = dev.children
+ old_lvs = [child.Copy() for child in dev.children]
iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
# we pass force_create=True to force the LVM creation
rename_new_to_old)
result.Raise("Can't rename new LVs on node %s" % self.target_node)
+ # Intermediate steps of in memory modifications
for old, new in zip(old_lvs, new_lvs):
new.logical_id = old.logical_id
self.cfg.SetDiskID(new, self.target_node)
+ # We need to modify old_lvs so that removal later removes the
+ # right LVs, not the newly added ones; note that old_lvs is a
+ # copy here
for disk in old_lvs:
disk.logical_id = ren_fn(disk, temp_suffix)
self.cfg.SetDiskID(disk, self.target_node)
"volumes"))
raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
- dev.children = new_lvs
-
- self.cfg.Update(self.instance, feedback_fn)
-
cstep = 5
if self.early_release:
self.lu.LogStep(cstep, steps_total, "Removing old storage")
locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
def Exec(self, feedback_fn):
+ instances = []
+ for node in self.op.nodes:
+ instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
+ if not instances:
+ return []
+
if self.op.remote_node is not None:
- instances = []
- for node in self.op.nodes:
- instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
result = []
for i in instances:
if i.primary_node == self.op.remote_node:
self.be_inst = i_bedict # the new dict (without defaults)
else:
self.be_new = self.be_inst = {}
+ be_old = cluster.FillBE(instance)
# osparams processing
if self.op.osparams:
self.warn = []
- if constants.BE_MEMORY in self.op.beparams and not self.op.force:
+ if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
+ be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
mem_check_list = [pnode]
if be_new[constants.BE_AUTO_BALANCE]:
# either we changed auto_balance to yes or it was from before
for node, nres in nodeinfo.items():
if node not in instance.secondary_nodes:
continue
- msg = nres.fail_msg
- if msg:
- self.warn.append("Can't get info from secondary node %s: %s" %
- (node, msg))
- elif not isinstance(nres.payload.get('memory_free', None), int):
- self.warn.append("Secondary node %s didn't return free"
- " memory information" % node)
+ nres.Raise("Can't get info from secondary node %s" % node,
+ prereq=True, ecode=errors.ECODE_STATE)
+ if not isinstance(nres.payload.get('memory_free', None), int):
+ raise errors.OpPrereqError("Secondary node %s didn't return free"
+ " memory information" % node,
+ errors.ECODE_STATE)
elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
- self.warn.append("Not enough memory to failover instance to"
- " secondary node %s" % node)
+ raise errors.OpPrereqError("This change will prevent the instance"
+ " from failover to its secondary node"
+ " %s, due to not enough memory" % node,
+ errors.ECODE_STATE)
# NIC processing
self.nic_pnew = {}
self.cfg.Update(instance, feedback_fn)
# disks are created, waiting for sync
- disk_abort = not _WaitForSync(self, instance)
+ disk_abort = not _WaitForSync(self, instance,
+ oneshot=not self.op.wait_for_sync)
if disk_abort:
raise errors.OpExecError("There are some degraded disks for"
" this instance, please cleanup manually")
# We want to lock all the affected nodes and groups. We have readily
# available the list of nodes, and the *destination* group. To gather the
- # list of "source" groups, we need to fetch node information.
- self.node_data = self.cfg.GetAllNodesInfo()
- affected_groups = set(self.node_data[node].group for node in self.op.nodes)
- affected_groups.add(self.group_uuid)
-
+ # list of "source" groups, we need to fetch node information later on.
self.needed_locks = {
- locking.LEVEL_NODEGROUP: list(affected_groups),
+ locking.LEVEL_NODEGROUP: set([self.group_uuid]),
locking.LEVEL_NODE: self.op.nodes,
}
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODEGROUP:
+ assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
+
+ # Try to get all affected nodes' groups without having the group or node
+ # lock yet. Needs verification later in the code flow.
+ groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
+
+ self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
+
def CheckPrereq(self):
"""Check prerequisites.
"""
+ assert self.needed_locks[locking.LEVEL_NODEGROUP]
+ assert (frozenset(self.acquired_locks[locking.LEVEL_NODE]) ==
+ frozenset(self.op.nodes))
+
+ expected_locks = (set([self.group_uuid]) |
+ self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
+ actual_locks = self.acquired_locks[locking.LEVEL_NODEGROUP]
+ if actual_locks != expected_locks:
+ raise errors.OpExecError("Nodes changed groups since locks were acquired,"
+ " current groups are '%s', used to be '%s'" %
+ (utils.CommaJoin(expected_locks),
+ utils.CommaJoin(actual_locks)))
+
+ self.node_data = self.cfg.GetAllNodesInfo()
self.group = self.cfg.GetNodeGroup(self.group_uuid)
instance_data = self.cfg.GetAllInstancesInfo()
for node in self.op.nodes:
self.node_data[node].group = self.group_uuid
+ # FIXME: Depends on side-effects of modifying the result of
+ # C{cfg.GetAllNodesInfo}
+
self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
@staticmethod