import os
import os.path
-import sha
import time
import tempfile
import re
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
- memory, vcpus, nics):
+ memory, vcpus, nics, disk_template, disks):
"""Builds instance related env variables for hooks
This builds the hook environment from individual variables.
@type nics: list
@param nics: list of tuples (ip, bridge, mac) representing
the NICs the instance has
+ @type disk_template: string
+ @param disk_template: the distk template of the instance
+ @type disks: list
+ @param disks: the list of (size, mode) pairs
@rtype: dict
@return: the hook environment for this instance
"INSTANCE_STATUS": str_status,
"INSTANCE_MEMORY": memory,
"INSTANCE_VCPUS": vcpus,
+ "INSTANCE_DISK_TEMPLATE": disk_template,
}
if nics:
ip = ""
env["INSTANCE_NIC%d_IP" % idx] = ip
env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
- env["INSTANCE_NIC%d_HWADDR" % idx] = mac
+ env["INSTANCE_NIC%d_MAC" % idx] = mac
else:
nic_count = 0
env["INSTANCE_NIC_COUNT"] = nic_count
+ if disks:
+ disk_count = len(disks)
+ for idx, (size, mode) in enumerate(disks):
+ env["INSTANCE_DISK%d_SIZE" % idx] = size
+ env["INSTANCE_DISK%d_MODE" % idx] = mode
+ else:
+ disk_count = 0
+
+ env["INSTANCE_DISK_COUNT"] = disk_count
+
return env
'memory': bep[constants.BE_MEMORY],
'vcpus': bep[constants.BE_VCPUS],
'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
+ 'disk_template': instance.disk_template,
+ 'disks': [(disk.size, disk.mode) for disk in instance.disks],
}
if override:
args.update(override)
def _VerifyNode(self, nodeinfo, file_list, local_cksum,
node_result, feedback_fn, master_files,
- drbd_map):
+ drbd_map, vg_name):
"""Run multiple tests against a node.
Test list:
@param drbd_map: the useddrbd minors for this node, in
form of minor: (instance, must_exist) which correspond to instances
and their running status
+ @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
"""
node = nodeinfo.name
(constants.RELEASE_VERSION, node, remote_version[1]))
# checks vg existence and size > 20G
-
- vglist = node_result.get(constants.NV_VGLIST, None)
- if not vglist:
- feedback_fn(" - ERROR: unable to check volume groups on node %s." %
- (node,))
- bad = True
- else:
- vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
- constants.MIN_VG_SIZE)
- if vgstatus:
- feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
+ if vg_name is not None:
+ vglist = node_result.get(constants.NV_VGLIST, None)
+ if not vglist:
+ feedback_fn(" - ERROR: unable to check volume groups on node %s." %
+ (node,))
bad = True
+ else:
+ vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
+ constants.MIN_VG_SIZE)
+ if vgstatus:
+ feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
+ bad = True
# checks config file checksum
(hv_name, hv_result))
# check used drbd list
- used_minors = node_result.get(constants.NV_DRBDLIST, [])
- for minor, (iname, must_exist) in drbd_map.items():
- if minor not in used_minors and must_exist:
- feedback_fn(" - ERROR: drbd minor %d of instance %s is not active" %
- (minor, iname))
- bad = True
- for minor in used_minors:
- if minor not in drbd_map:
- feedback_fn(" - ERROR: unallocated drbd minor %d is in use" % minor)
- bad = True
+ if vg_name is not None:
+ used_minors = node_result.get(constants.NV_DRBDLIST, [])
+ if not isinstance(used_minors, (tuple, list)):
+ feedback_fn(" - ERROR: cannot parse drbd status file: %s" %
+ str(used_minors))
+ else:
+ for minor, (iname, must_exist) in drbd_map.items():
+ if minor not in used_minors and must_exist:
+ feedback_fn(" - ERROR: drbd minor %d of instance %s is"
+ " not active" % (minor, iname))
+ bad = True
+ for minor in used_minors:
+ if minor not in drbd_map:
+ feedback_fn(" - ERROR: unallocated drbd minor %d is in use" %
+ minor)
+ bad = True
return bad
"""
all_nodes = self.cfg.GetNodeList()
- # TODO: populate the environment with useful information for verify hooks
- env = {}
+ env = {
+ "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
+ }
+ for node in self.cfg.GetAllNodesInfo().values():
+ env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
+
return env, [], all_nodes
def Exec(self, feedback_fn):
constants.NV_NODENETTEST: [(node.name, node.primary_ip,
node.secondary_ip) for node in nodeinfo
if not node.offline],
- constants.NV_LVLIST: vg_name,
constants.NV_INSTANCELIST: hypervisors,
- constants.NV_VGLIST: None,
constants.NV_VERSION: None,
constants.NV_HVINFO: self.cfg.GetHypervisorType(),
- constants.NV_DRBDLIST: None,
}
+ if vg_name is not None:
+ node_verify_param[constants.NV_VGLIST] = None
+ node_verify_param[constants.NV_LVLIST] = vg_name
+ node_verify_param[constants.NV_DRBDLIST] = None
all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
self.cfg.GetClusterName())
node_drbd = {}
for minor, instance in all_drbd_map[node].items():
- instance = instanceinfo[instance]
- node_drbd[minor] = (instance.name, instance.admin_up)
+ if instance not in instanceinfo:
+ feedback_fn(" - ERROR: ghost instance '%s' in temporary DRBD map" %
+ instance)
+ # ghost instance should not be running, but otherwise we
+ # don't give double warnings (both ghost instance and
+ # unallocated minor in use)
+ node_drbd[minor] = (instance, False)
+ else:
+ instance = instanceinfo[instance]
+ node_drbd[minor] = (instance.name, instance.admin_up)
result = self._VerifyNode(node_i, file_names, local_checksums,
nresult, feedback_fn, master_files,
- node_drbd)
+ node_drbd, vg_name)
bad = bad or result
lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
- if isinstance(lvdata, basestring):
+ if vg_name is None:
+ node_volume[node] = {}
+ elif isinstance(lvdata, basestring):
feedback_fn(" - ERROR: LVM problem on node %s: %s" %
(node, utils.SafeEncode(lvdata)))
bad = True
try:
node_info[node] = {
"mfree": int(nodeinfo['memory_free']),
- "dfree": int(nresult[constants.NV_VGLIST][vg_name]),
"pinst": [],
"sinst": [],
# dictionary holding all instances this node is secondary for,
# secondary.
"sinst-by-pnode": {},
}
- except ValueError:
- feedback_fn(" - ERROR: invalid value returned from node %s" % (node,))
+ # FIXME: devise a free space model for file based instances as well
+ if vg_name is not None:
+ if (constants.NV_VGLIST not in nresult or
+ vg_name not in nresult[constants.NV_VGLIST]):
+ feedback_fn(" - ERROR: node %s didn't return data for the"
+ " volume group '%s' - it is either missing or broken" %
+ (node, vg_name))
+ bad = True
+ continue
+ node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
+ except (ValueError, KeyError):
+ feedback_fn(" - ERROR: invalid nodeinfo value returned"
+ " from node %s" % (node,))
bad = True
continue
if isinstance(lvs, basestring):
logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
res_nlvm[node] = lvs
+ continue
elif not isinstance(lvs, dict):
logging.warning("Connection to node %s failed or invalid data"
" returned", node)
_OP_REQP = []
REQ_BGL = False
- def CheckParameters(self):
+ def CheckArguments(self):
"""Check parameters
"""
if self.op.candidate_pool_size is not None:
try:
self.op.candidate_pool_size = int(self.op.candidate_pool_size)
- except ValueError, err:
+ except (ValueError, TypeError), err:
raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
str(err))
if self.op.candidate_pool_size < 1:
"""
if self.op.vg_name is not None:
- if self.op.vg_name != self.cfg.GetVGName():
- self.cfg.SetVGName(self.op.vg_name)
+ new_volume = self.op.vg_name
+ if not new_volume:
+ new_volume = None
+ if new_volume != self.cfg.GetVGName():
+ self.cfg.SetVGName(new_volume)
else:
feedback_fn("Cluster LVM configuration already in desired"
" state, not changing")
_AdjustCandidatePool(self)
+def _RedistributeAncillaryFiles(lu, additional_nodes=None):
+ """Distribute additional files which are part of the cluster configuration.
+
+ ConfigWriter takes care of distributing the config and ssconf files, but
+ there are more files which should be distributed to all nodes. This function
+ makes sure those are copied.
+
+ @param lu: calling logical unit
+ @param additional_nodes: list of nodes not in the config to distribute to
+
+ """
+ # 1. Gather target nodes
+ myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
+ dist_nodes = lu.cfg.GetNodeList()
+ if additional_nodes is not None:
+ dist_nodes.extend(additional_nodes)
+ if myself.name in dist_nodes:
+ dist_nodes.remove(myself.name)
+ # 2. Gather files to distribute
+ dist_files = set([constants.ETC_HOSTS,
+ constants.SSH_KNOWN_HOSTS_FILE,
+ constants.RAPI_CERT_FILE,
+ constants.RAPI_USERS_FILE,
+ ])
+
+ enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
+ for hv_name in enabled_hypervisors:
+ hv_class = hypervisor.GetHypervisor(hv_name)
+ dist_files.update(hv_class.GetAncillaryFiles())
+
+ # 3. Perform the files upload
+ for fname in dist_files:
+ if os.path.exists(fname):
+ result = lu.rpc.call_upload_file(dist_nodes, fname)
+ for to_node, to_result in result.items():
+ if to_result.failed or not to_result.data:
+ logging.error("Copy of file %s to node %s failed", fname, to_node)
+
+
class LURedistributeConfig(NoHooksLU):
"""Force the redistribution of cluster configuration.
"""
self.cfg.Update(self.cfg.GetClusterInfo())
+ _RedistributeAncillaryFiles(self)
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
selected=self.op.output_fields)
# Lock all nodes, in shared mode
+ # Temporary removal of locks, should be reverted later
+ # TODO: reintroduce locks when they are lighter-weight
self.needed_locks = {}
- self.share_locks[locking.LEVEL_NODE] = 1
- self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+ #self.share_locks[locking.LEVEL_NODE] = 1
+ #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
def CheckPrereq(self):
"""Check prerequisites.
@param rlist: a map with node names as keys and OS objects as values
@rtype: dict
- @returns: a dictionary with osnames as keys and as value another map, with
+ @return: a dictionary with osnames as keys and as value another map, with
nodes as keys and list of OS objects as values, eg::
{"debian-etch": {"node1": [<object>,...],
"""
all_os = {}
+ # we build here the list of nodes that didn't fail the RPC (at RPC
+ # level), so that nodes with a non-responding node daemon don't
+ # make all OSes invalid
+ good_nodes = [node_name for node_name in rlist
+ if not rlist[node_name].failed]
for node_name, nr in rlist.iteritems():
if nr.failed or not nr.data:
continue
# build a list of nodes for this os containing empty lists
# for each node in node_list
all_os[os_obj.name] = {}
- for nname in node_list:
+ for nname in good_nodes:
all_os[os_obj.name][nname] = []
all_os[os_obj.name][node_name].append(os_obj)
return all_os
"""Compute the list of OSes.
"""
- node_list = self.acquired_locks[locking.LEVEL_NODE]
- valid_nodes = [node for node in self.cfg.GetOnlineNodeList()
- if node in node_list]
+ valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
node_data = self.rpc.call_os_diagnose(valid_nodes)
if node_data == False:
raise errors.OpExecError("Can't gather the list of OSes")
(verifier, result[verifier].data['nodelist'][failed]))
raise errors.OpExecError("ssh/hostname verification failed.")
- # Distribute updated /etc/hosts and known_hosts to all nodes,
- # including the node just added
- myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
- dist_nodes = self.cfg.GetNodeList()
- if not self.op.readd:
- dist_nodes.append(node)
- if myself.name in dist_nodes:
- dist_nodes.remove(myself.name)
-
- logging.debug("Copying hosts and known_hosts to all nodes")
- for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
- result = self.rpc.call_upload_file(dist_nodes, fname)
- for to_node, to_result in result.iteritems():
- if to_result.failed or not to_result.data:
- logging.error("Copy of file %s to node %s failed", fname, to_node)
-
- to_copy = []
- enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
- if constants.HTS_COPY_VNC_PASSWORD.intersection(enabled_hypervisors):
- to_copy.append(constants.VNC_PASSWORD_FILE)
-
- for fname in to_copy:
- result = self.rpc.call_upload_file([node], fname)
- if result[node].failed or not result[node]:
- logging.error("Could not copy file %s to node %s", fname, node)
-
if self.op.readd:
+ _RedistributeAncillaryFiles(self)
self.context.ReaddNode(new_node)
else:
+ _RedistributeAncillaryFiles(self, additional_nodes=node)
self.context.AddNode(new_node)
((node.offline and not self.op.offline == False) or
(node.drained and not self.op.drained == False))):
raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
- " to master_candidate")
+ " to master_candidate" % node.name)
return
for hypervisor in cluster.enabled_hypervisors]),
"beparams": cluster.beparams,
"candidate_pool_size": cluster.candidate_pool_size,
+ "default_bridge": cluster.default_bridge,
+ "master_netdev": cluster.master_netdev,
+ "volume_group_name": cluster.volume_group_name,
+ "file_storage_dir": cluster.file_storage_dir,
}
return result
" (is_primary=True, pass=2): %s",
inst_disk.iv_name, node, msg)
disks_ok = False
- device_info.append((instance.primary_node, inst_disk.iv_name, result.data))
+ device_info.append((instance.primary_node, inst_disk.iv_name,
+ result.payload))
# leave the disks configured for the primary node
# this is a workaround that would be fixed better by
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ # extra beparams
+ self.beparams = getattr(self.op, "beparams", {})
+ if self.beparams:
+ if not isinstance(self.beparams, dict):
+ raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
+ " dict" % (type(self.beparams), ))
+ # fill the beparams dict
+ utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
+ self.op.beparams = self.beparams
+
+ # extra hvparams
+ self.hvparams = getattr(self.op, "hvparams", {})
+ if self.hvparams:
+ if not isinstance(self.hvparams, dict):
+ raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
+ " dict" % (type(self.hvparams), ))
+
+ # check hypervisor parameter syntax (locally)
+ cluster = self.cfg.GetClusterInfo()
+ utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
+ filled_hvp = cluster.FillDict(cluster.hvparams[instance.hypervisor],
+ instance.hvparams)
+ filled_hvp.update(self.hvparams)
+ hv_type = hypervisor.GetHypervisor(instance.hypervisor)
+ hv_type.CheckParameterSyntax(filled_hvp)
+ _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
+ self.op.hvparams = self.hvparams
+
_CheckNodeOnline(self, instance.primary_node)
bep = self.cfg.GetClusterInfo().FillBE(instance)
# check bridges existance
_CheckInstanceBridgesExist(self, instance)
- _CheckNodeFreeMemory(self, instance.primary_node,
- "starting instance %s" % instance.name,
- bep[constants.BE_MEMORY], instance.hypervisor)
+ remote_info = self.rpc.call_instance_info(instance.primary_node,
+ instance.name,
+ instance.hypervisor)
+ remote_info.Raise()
+ if not remote_info.data:
+ _CheckNodeFreeMemory(self, instance.primary_node,
+ "starting instance %s" % instance.name,
+ bep[constants.BE_MEMORY], instance.hypervisor)
def Exec(self, feedback_fn):
"""Start the instance.
"""
instance = self.instance
force = self.op.force
- extra_args = getattr(self.op, "extra_args", "")
self.cfg.MarkInstanceUp(instance.name)
_StartInstanceDisks(self, instance, force)
- result = self.rpc.call_instance_start(node_current, instance, extra_args)
+ result = self.rpc.call_instance_start(node_current, instance,
+ self.hvparams, self.beparams)
msg = result.RemoteFailMsg()
if msg:
_ShutdownInstanceDisks(self, instance)
"""
env = {
"IGNORE_SECONDARIES": self.op.ignore_secondaries,
+ "REBOOT_TYPE": self.op.reboot_type,
}
env.update(_BuildInstanceHookEnvByObject(self, self.instance))
nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
instance = self.instance
ignore_secondaries = self.op.ignore_secondaries
reboot_type = self.op.reboot_type
- extra_args = getattr(self.op, "extra_args", "")
node_current = instance.primary_node
if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
constants.INSTANCE_REBOOT_HARD]:
+ for disk in instance.disks:
+ self.cfg.SetDiskID(disk, node_current)
result = self.rpc.call_instance_reboot(node_current, instance,
- reboot_type, extra_args)
+ reboot_type)
msg = result.RemoteFailMsg()
if msg:
raise errors.OpExecError("Could not reboot instance: %s" % msg)
else:
- if not self.rpc.call_instance_shutdown(node_current, instance):
- raise errors.OpExecError("could not shutdown instance for full reboot")
+ result = self.rpc.call_instance_shutdown(node_current, instance)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not shutdown instance for"
+ " full reboot: %s" % msg)
_ShutdownInstanceDisks(self, instance)
_StartInstanceDisks(self, instance, ignore_secondaries)
- result = self.rpc.call_instance_start(node_current, instance, extra_args)
+ result = self.rpc.call_instance_start(node_current, instance, None, None)
msg = result.RemoteFailMsg()
if msg:
_ShutdownInstanceDisks(self, instance)
node_current = instance.primary_node
self.cfg.MarkInstanceDown(instance.name)
result = self.rpc.call_instance_shutdown(node_current, instance)
- if result.failed or not result.data:
- self.proc.LogWarning("Could not shutdown instance")
+ msg = result.RemoteFailMsg()
+ if msg:
+ self.proc.LogWarning("Could not shutdown instance: %s" % msg)
_ShutdownInstanceDisks(self, instance)
remote_info = self.rpc.call_instance_info(instance.primary_node,
instance.name,
instance.hypervisor)
- if remote_info.failed or remote_info.data:
+ remote_info.Raise()
+ if remote_info.data:
raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
(self.op.instance_name,
instance.primary_node))
_StartInstanceDisks(self, inst, None)
try:
feedback_fn("Running the instance OS create scripts...")
- result = self.rpc.call_instance_os_add(inst.primary_node, inst)
+ result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
msg = result.RemoteFailMsg()
if msg:
raise errors.OpExecError("Could not install OS for instance %s"
instance.name, instance.primary_node)
result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
- if result.failed or not result.data:
+ msg = result.RemoteFailMsg()
+ if msg:
if self.op.ignore_failures:
- feedback_fn("Warning: can't shutdown instance")
+ feedback_fn("Warning: can't shutdown instance: %s" % msg)
else:
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, instance.primary_node))
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, instance.primary_node, msg))
logging.info("Removing block devices for instance %s", instance.name)
_OP_REQP = ["output_fields", "names", "use_locking"]
REQ_BGL = False
_FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
- "admin_state", "admin_ram",
+ "admin_state",
"disk_template", "ip", "mac", "bridge",
"sda_size", "sdb_size", "vcpus", "tags",
"network_port", "beparams",
- "(disk).(size)/([0-9]+)",
- "(disk).(sizes)", "disk_usage",
- "(nic).(mac|ip|bridge)/([0-9]+)",
- "(nic).(macs|ips|bridges)",
- "(disk|nic).(count)",
+ r"(disk)\.(size)/([0-9]+)",
+ r"(disk)\.(sizes)", "disk_usage",
+ r"(nic)\.(mac|ip|bridge)/([0-9]+)",
+ r"(nic)\.(macs|ips|bridges)",
+ r"(disk|nic)\.(count)",
"serial_no", "hypervisor", "hvparams",] +
["hv/%s" % name
for name in constants.HVS_PARAMETERS] +
instance.name, source_node)
result = self.rpc.call_instance_shutdown(source_node, instance)
- if result.failed or not result.data:
+ msg = result.RemoteFailMsg()
+ if msg:
if self.op.ignore_consistency:
self.proc.LogWarning("Could not shutdown instance %s on node %s."
- " Proceeding"
- " anyway. Please make sure node %s is down",
- instance.name, source_node, source_node)
+ " Proceeding anyway. Please make sure node"
+ " %s is down. Error details: %s",
+ instance.name, source_node, source_node, msg)
else:
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, source_node))
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, source_node, msg))
feedback_fn("* deactivating the instance's disks on source node")
if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
raise errors.OpExecError("Can't activate the instance's disks")
feedback_fn("* starting the instance on the target node")
- result = self.rpc.call_instance_start(target_node, instance, None)
+ result = self.rpc.call_instance_start(target_node, instance, None, None)
msg = result.RemoteFailMsg()
if msg:
_ShutdownInstanceDisks(self, instance)
"""
env = _BuildInstanceHookEnvByObject(self, self.instance)
+ env["MIGRATE_LIVE"] = self.op.live
+ env["MIGRATE_CLEANUP"] = self.op.cleanup
nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
return env, nl, nl
continue
msg = info.RemoteFailMsg()
if msg:
- raise errors.OpPrereqError("Hypervisor parameter validation failed:"
- " %s" % msg)
+ raise errors.OpPrereqError("Hypervisor parameter validation"
+ " failed on node %s: %s" % (node, msg))
class LUCreateInstance(LogicalUnit):
"""
env = {
- "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
- "INSTANCE_DISK_SIZE": ",".join(str(d["size"]) for d in self.disks),
- "INSTANCE_ADD_MODE": self.op.mode,
+ "ADD_MODE": self.op.mode,
}
if self.op.mode == constants.INSTANCE_IMPORT:
- env["INSTANCE_SRC_NODE"] = self.op.src_node
- env["INSTANCE_SRC_PATH"] = self.op.src_path
- env["INSTANCE_SRC_IMAGES"] = self.src_images
+ env["SRC_NODE"] = self.op.src_node
+ env["SRC_PATH"] = self.op.src_path
+ env["SRC_IMAGES"] = self.src_images
- env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
+ env.update(_BuildInstanceHookEnv(
+ name=self.op.instance_name,
primary_node=self.op.pnode,
secondary_nodes=self.secondaries,
status=self.op.start,
memory=self.be_full[constants.BE_MEMORY],
vcpus=self.be_full[constants.BE_VCPUS],
nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
+ disk_template=self.op.disk_template,
+ disks=[(d["size"], d["mode"]) for d in self.disks],
))
nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
raise errors.OpPrereqError("Cluster does not support lvm-based"
" instances")
-
if self.op.mode == constants.INSTANCE_IMPORT:
src_node = self.op.src_node
src_path = self.op.src_path
nic_mac_ini = 'nic%d_mac' % idx
nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
+ # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
# ip ping checks (we use the same ip that was resolved in ExpandNames)
if self.op.start and not self.op.ip_check:
raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
raise errors.OpPrereqError("IP %s of instance %s already in use" %
(self.check_ip, self.op.instance_name))
+ #### mac address generation
+ # By generating here the mac address both the allocator and the hooks get
+ # the real final mac address rather than the 'auto' or 'generate' value.
+ # There is a race condition between the generation and the instance object
+ # creation, which means that we know the mac is valid now, but we're not
+ # sure it will be when we actually add the instance. If things go bad
+ # adding the instance will abort because of a duplicate mac, and the
+ # creation job will fail.
+ for nic in self.nics:
+ if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ nic.mac = self.cfg.GenerateMAC()
+
#### allocator run
if self.op.iallocator is not None:
instance = self.op.instance_name
pnode_name = self.pnode.name
- for nic in self.nics:
- if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
- nic.mac = self.cfg.GenerateMAC()
-
ht_kind = self.op.hypervisor
if ht_kind in constants.HTS_REQ_PORT:
network_port = self.cfg.AllocatePort()
if iobj.disk_template != constants.DT_DISKLESS:
if self.op.mode == constants.INSTANCE_CREATE:
feedback_fn("* running the instance OS create scripts...")
- result = self.rpc.call_instance_os_add(pnode_name, iobj)
+ result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
msg = result.RemoteFailMsg()
if msg:
raise errors.OpExecError("Could not add os for instance %s"
self.cfg.Update(iobj)
logging.info("Starting instance %s on node %s", instance, pnode_name)
feedback_fn("* starting instance...")
- result = self.rpc.call_instance_start(pnode_name, iobj, None)
+ result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
msg = result.RemoteFailMsg()
if msg:
raise errors.OpExecError("Could not start instance: %s" % msg)
try:
_CreateSingleBlockDev(self, new_node, instance, new_drbd,
_GetInstanceInfoText(instance), False)
- except errors.BlockDeviceError:
+ except errors.GenericError:
self.cfg.ReleaseDRBDMinors(instance.name)
raise
if not static:
self.cfg.SetDiskID(dev, instance.primary_node)
dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
- msg = dev_pstatus.RemoteFailMsg()
- if msg:
- raise errors.OpExecError("Can't compute disk status for %s: %s" %
- (instance.name, msg))
- dev_pstatus = dev_pstatus.payload
+ if dev_pstatus.offline:
+ dev_pstatus = None
+ else:
+ msg = dev_pstatus.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Can't compute disk status for %s: %s" %
+ (instance.name, msg))
+ dev_pstatus = dev_pstatus.payload
else:
dev_pstatus = None
if snode and not static:
self.cfg.SetDiskID(dev, snode)
dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
- msg = dev_sstatus.RemoteFailMsg()
- if msg:
- raise errors.OpExecError("Can't compute disk status for %s: %s" %
- (instance.name, msg))
- dev_sstatus = dev_sstatus.payload
+ if dev_sstatus.offline:
+ dev_sstatus = None
+ else:
+ msg = dev_sstatus.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Can't compute disk status for %s: %s" %
+ (instance.name, msg))
+ dev_sstatus = dev_sstatus.payload
else:
dev_sstatus = None
# nic_dict should be a dict
nic_ip = nic_dict.get('ip', None)
if nic_ip is not None:
- if nic_ip.lower() == "none":
+ if nic_ip.lower() == constants.VALUE_NONE:
nic_dict['ip'] = None
else:
if not utils.IsValidIP(nic_ip):
raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
- # we can only check None bridges and assign the default one
- nic_bridge = nic_dict.get('bridge', None)
- if nic_bridge is None:
- nic_dict['bridge'] = self.cfg.GetDefBridge()
- # but we can validate MACs
- nic_mac = nic_dict.get('mac', None)
- if nic_mac is not None:
- if self.cfg.IsMacInUse(nic_mac):
- raise errors.OpPrereqError("MAC address %s already in use"
- " in cluster" % nic_mac)
+
+ if nic_op == constants.DDM_ADD:
+ nic_bridge = nic_dict.get('bridge', None)
+ if nic_bridge is None:
+ nic_dict['bridge'] = self.cfg.GetDefBridge()
+ nic_mac = nic_dict.get('mac', None)
+ if nic_mac is None:
+ nic_dict['mac'] = constants.VALUE_AUTO
+
+ if 'mac' in nic_dict:
+ nic_mac = nic_dict['mac']
if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
if not utils.IsValidMac(nic_mac):
raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
+ if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
+ raise errors.OpPrereqError("'auto' is not a valid MAC address when"
+ " modifying an existing nic")
+
if nic_addremove > 1:
raise errors.OpPrereqError("Only one NIC add or remove operation"
" supported at a time")
args['memory'] = self.be_new[constants.BE_MEMORY]
if constants.BE_VCPUS in self.be_new:
args['vcpus'] = self.be_new[constants.BE_VCPUS]
- # FIXME: readd disk/nic changes
+ # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
+ # information at all.
+ if self.op.nics:
+ args['nics'] = []
+ nic_override = dict(self.op.nics)
+ for idx, nic in enumerate(self.instance.nics):
+ if idx in nic_override:
+ this_nic_override = nic_override[idx]
+ else:
+ this_nic_override = {}
+ if 'ip' in this_nic_override:
+ ip = this_nic_override['ip']
+ else:
+ ip = nic.ip
+ if 'bridge' in this_nic_override:
+ bridge = this_nic_override['bridge']
+ else:
+ bridge = nic.bridge
+ if 'mac' in this_nic_override:
+ mac = this_nic_override['mac']
+ else:
+ mac = nic.mac
+ args['nics'].append((ip, bridge, mac))
+ if constants.DDM_ADD in nic_override:
+ ip = nic_override[constants.DDM_ADD].get('ip', None)
+ bridge = nic_override[constants.DDM_ADD]['bridge']
+ mac = nic_override[constants.DDM_ADD]['mac']
+ args['nics'].append((ip, bridge, mac))
+ elif constants.DDM_REMOVE in nic_override:
+ del args['nics'][-1]
+
env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
return env, nl, nl
self.warn.append("Can't get info from primary node %s" % pnode)
else:
if not instance_info.failed and instance_info.data:
- current_mem = instance_info.data['memory']
+ current_mem = int(instance_info.data['memory'])
else:
# Assume instance not running
# (there is a slight race condition here, but it's not very probable,
raise errors.OpPrereqError("Invalid NIC index %s, valid values"
" are 0 to %d" %
(nic_op, len(instance.nics)))
- nic_bridge = nic_dict.get('bridge', None)
- if nic_bridge is not None:
+ if 'bridge' in nic_dict:
+ nic_bridge = nic_dict['bridge']
+ if nic_bridge is None:
+ raise errors.OpPrereqError('Cannot set the nic bridge to None')
if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
msg = ("Bridge '%s' doesn't exist on one of"
" the instance nodes" % nic_bridge)
self.warn.append(msg)
else:
raise errors.OpPrereqError(msg)
+ if 'mac' in nic_dict:
+ nic_mac = nic_dict['mac']
+ if nic_mac is None:
+ raise errors.OpPrereqError('Cannot set the nic mac to None')
+ elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ # otherwise generate the mac
+ nic_dict['mac'] = self.cfg.GenerateMAC()
+ else:
+ # or validate/reserve the current one
+ if self.cfg.IsMacInUse(nic_mac):
+ raise errors.OpPrereqError("MAC address %s already in use"
+ " in cluster" % nic_mac)
# DISK processing
if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
del instance.nics[-1]
result.append(("nic.%d" % len(instance.nics), "remove"))
elif nic_op == constants.DDM_ADD:
- # add a new nic
- if 'mac' not in nic_dict:
- mac = constants.VALUE_GENERATE
- else:
- mac = nic_dict['mac']
- if mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
- mac = self.cfg.GenerateMAC()
+ # mac and bridge should be set, by now
+ mac = nic_dict['mac']
+ bridge = nic_dict['bridge']
new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
- bridge=nic_dict.get('bridge', None))
+ bridge=bridge)
instance.nics.append(new_nic)
result.append(("nic.%d" % (len(instance.nics) - 1),
"add:mac=%s,ip=%s,bridge=%s" %
if self.op.shutdown:
# shutdown the instance, but not the disks
result = self.rpc.call_instance_shutdown(src_node, instance)
- result.Raise()
- if not result.data:
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, src_node))
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, src_node, msg))
vgname = self.cfg.GetVGName()
finally:
if self.op.shutdown and instance.admin_up:
- result = self.rpc.call_instance_start(src_node, instance, None)
+ result = self.rpc.call_instance_start(src_node, instance, None, None)
msg = result.RemoteFailMsg()
if msg:
_ShutdownInstanceDisks(self, instance)
cluster_info = cfg.GetClusterInfo()
# cluster data
data = {
- "version": 1,
+ "version": constants.IALLOCATOR_VERSION,
"cluster_name": cfg.GetClusterName(),
"cluster_tags": list(cluster_info.GetTags()),
"enabled_hypervisors": list(cluster_info.enabled_hypervisors),
"disk_template": iinfo.disk_template,
"hypervisor": iinfo.hypervisor,
}
+ pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
+ pir["disks"])
instance_data[iinfo.name] = pir
data["instances"] = instance_data