return {}, [], []
+def _AddHostToEtcHosts(hostname):
+ """Wrapper around utils.SetEtcHostsEntry.
+
+ """
+ hi = utils.HostInfo(name=hostname)
+ utils.SetEtcHostsEntry(constants.ETC_HOSTS, hi.ip, hi.name, [hi.ShortName()])
+
+
def _RemoveHostFromEtcHosts(hostname):
- """Wrapper around utils.RemoteEtcHostsEntry.
+ """Wrapper around utils.RemoveEtcHostsEntry.
"""
hi = utils.HostInfo(name=hostname)
if nics:
nic_count = len(nics)
- for idx, (ip, bridge) in enumerate(nics):
+ for idx, (ip, bridge, mac) in enumerate(nics):
if ip is None:
ip = ""
env["INSTANCE_NIC%d_IP" % idx] = ip
env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
+ env["INSTANCE_NIC%d_HWADDR" % idx] = mac
else:
nic_count = 0
'status': instance.os,
'memory': instance.memory,
'vcpus': instance.vcpus,
- 'nics': [(nic.ip, nic.bridge) for nic in instance.nics],
+ 'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
}
if override:
args.update(override)
if config.ConfigWriter.IsCluster():
raise errors.OpPrereqError("Cluster is already initialised")
+ if self.op.hypervisor_type == constants.HT_XEN_HVM31:
+ if not os.path.exists(constants.VNC_PASSWORD_FILE):
+ raise errors.OpPrereqError("Please prepare the cluster VNC"
+ "password file %s" %
+ constants.VNC_PASSWORD_FILE)
+
self.hostname = hostname = utils.HostInfo()
if hostname.ip.startswith("127."):
raise errors.OpPrereqError("This host's IP resolves to the private"
- " range (%s). Please fix DNS or /etc/hosts." %
- (hostname.ip,))
+ " range (%s). Please fix DNS or %s." %
+ (hostname.ip, constants.ETC_HOSTS))
self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
- if not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, hostname.ip,
- constants.DEFAULT_NODED_PORT):
+ if not utils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT,
+ source=constants.LOCALHOST_IP_ADDRESS):
raise errors.OpPrereqError("Inconsistency: this host's name resolves"
" to %s,\nbut this ip address does not"
" belong to this host."
raise errors.OpPrereqError("Invalid secondary ip given")
if (secondary_ip and
secondary_ip != hostname.ip and
- (not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, secondary_ip,
- constants.DEFAULT_NODED_PORT))):
- raise errors.OpPrereqError("You gave %s as secondary IP,\n"
- "but it does not belong to this host." %
+ (not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
+ source=constants.LOCALHOST_IP_ADDRESS))):
+ raise errors.OpPrereqError("You gave %s as secondary IP,"
+ " but it does not belong to this host." %
secondary_ip)
self.secondary_ip = secondary_ip
raise errors.OpPrereqError("Invalid mac prefix given '%s'" %
self.op.mac_prefix)
- if self.op.hypervisor_type not in hypervisor.VALID_HTYPES:
+ if self.op.hypervisor_type not in constants.HYPER_TYPES:
raise errors.OpPrereqError("Invalid hypervisor type given '%s'" %
self.op.hypervisor_type)
if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
- raise errors.OpPrereqError("Init.d script '%s' missing or not "
- "executable." % constants.NODE_INITD_SCRIPT)
+ raise errors.OpPrereqError("Init.d script '%s' missing or not"
+ " executable." % constants.NODE_INITD_SCRIPT)
def Exec(self, feedback_fn):
"""Initialize the cluster.
f.close()
sshkey = sshline.split(" ")[1]
- hi = utils.HostInfo(name=hostname.name)
- utils.AddEtcHostsEntry(constants.ETC_HOSTS, hostname.name, hi.ip)
- utils.AddEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName(), hi.ip)
- del hi
+ _AddHostToEtcHosts(hostname.name)
_UpdateKnownHosts(hostname.name, hostname.ip, sshkey)
"""
master = self.sstore.GetMasterNode()
+ if not rpc.call_node_stop_master(master):
+ raise errors.OpExecError("Could not disable the master role")
priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
utils.CreateBackup(priv_key)
utils.CreateBackup(pub_key)
rpc.call_node_leave_cluster(master)
- _RemoveHostFromEtcHosts(master)
class LUVerifyCluster(NoHooksLU):
"""
bad = False
feedback_fn("* Verifying global settings")
- self.cfg.VerifyConfig()
+ for msg in self.cfg.VerifyConfig():
+ feedback_fn(" - ERROR: %s" % msg)
vg_name = self.cfg.GetVGName()
nodelist = utils.NiceSort(self.cfg.GetNodeList())
# node_volume
volumeinfo = all_volumeinfo[node]
- if type(volumeinfo) != dict:
+ if isinstance(volumeinfo, basestring):
+ feedback_fn(" - ERROR: LVM problem on node %s: %s" %
+ (node, volumeinfo[-400:].encode('string_escape')))
+ bad = True
+ node_volume[node] = {}
+ elif not isinstance(volumeinfo, dict):
feedback_fn(" - ERROR: connection to %s failed" % (node,))
bad = True
continue
-
- node_volume[node] = volumeinfo
+ else:
+ node_volume[node] = volumeinfo
# node_instance
nodeinstance = all_instanceinfo[node]
return int(bad)
+class LUVerifyDisks(NoHooksLU):
+ """Verifies the cluster disks status.
+
+ """
+ _OP_REQP = []
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This has no prerequisites.
+
+ """
+ pass
+
+ def Exec(self, feedback_fn):
+ """Verify integrity of cluster disks.
+
+ """
+ result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
+
+ vg_name = self.cfg.GetVGName()
+ nodes = utils.NiceSort(self.cfg.GetNodeList())
+ instances = [self.cfg.GetInstanceInfo(name)
+ for name in self.cfg.GetInstanceList()]
+
+ nv_dict = {}
+ for inst in instances:
+ inst_lvs = {}
+ if (inst.status != "up" or
+ inst.disk_template not in constants.DTS_NET_MIRROR):
+ continue
+ inst.MapLVsByNode(inst_lvs)
+ # transform { iname: {node: [vol,],},} to {(node, vol): iname}
+ for node, vol_list in inst_lvs.iteritems():
+ for vol in vol_list:
+ nv_dict[(node, vol)] = inst
+
+ if not nv_dict:
+ return result
+
+ node_lvs = rpc.call_volume_list(nodes, vg_name)
+
+ to_act = set()
+ for node in nodes:
+ # node_volume
+ lvs = node_lvs[node]
+
+ if isinstance(lvs, basestring):
+ logger.Info("error enumerating LVs on node %s: %s" % (node, lvs))
+ res_nlvm[node] = lvs
+ elif not isinstance(lvs, dict):
+ logger.Info("connection to node %s failed or invalid data returned" %
+ (node,))
+ res_nodes.append(node)
+ continue
+
+ for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
+ inst = nv_dict.pop((node, lv_name), None)
+ if (not lv_online and inst is not None
+ and inst.name not in res_instances):
+ res_instances.append(inst.name)
+
+ # any leftover items in nv_dict are missing LVs, let's arrange the
+ # data better
+ for key, inst in nv_dict.iteritems():
+ if inst.name not in res_missing:
+ res_missing[inst.name] = []
+ res_missing[inst.name].append(key)
+
+ return result
+
+
class LURenameCluster(LogicalUnit):
"""Rename the cluster.
"""
env = {
- "OP_TARGET": self.op.sstore.GetClusterName(),
+ "OP_TARGET": self.sstore.GetClusterName(),
"NEW_NAME": self.op.name,
}
mn = self.sstore.GetMasterNode()
(fname, to_node))
finally:
if not rpc.call_node_start_master(master):
- logger.Error("Could not re-enable the master role on the master,\n"
- "please restart manually.")
+ logger.Error("Could not re-enable the master role on the master,"
+ " please restart manually.")
def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False):
" new node doesn't have one")
# checks reachablity
- if not utils.TcpPing(utils.HostInfo().name,
- primary_ip,
- constants.DEFAULT_NODED_PORT):
+ if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("Node not reachable by ping")
if not newbie_singlehomed:
# check reachability from my secondary ip to newbie's secondary ip
- if not utils.TcpPing(myself.secondary_ip,
- secondary_ip,
- constants.DEFAULT_NODED_PORT):
- raise errors.OpPrereqError(
- "Node secondary ip not reachable by TCP based ping to noded port")
+ if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
+ source=myself.secondary_ip):
+ raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
+ " based ping to noded port")
self.new_node = objects.Node(name=node,
primary_ip=primary_ip,
secondary_ip=secondary_ip)
+ if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+ if not os.path.exists(constants.VNC_PASSWORD_FILE):
+ raise errors.OpPrereqError("Cluster VNC password file %s missing" %
+ constants.VNC_PASSWORD_FILE)
+
def Exec(self, feedback_fn):
"""Adds the new node to the cluster.
raise errors.OpExecError("Cannot transfer ssh keys to the new node")
# Add node to our /etc/hosts, and add key to known_hosts
- hi = utils.HostInfo(name=new_node.name)
- utils.AddEtcHostsEntry(constants.ETC_HOSTS, new_node.name, hi.ip)
- utils.AddEtcHostsEntry(constants.ETC_HOSTS, hi.ShortName(), hi.ip)
- del hi
+ _AddHostToEtcHosts(new_node.name)
_UpdateKnownHosts(new_node.name, new_node.primary_ip,
self.cfg.GetHostKey())
new_node.secondary_ip,
constants.DEFAULT_NODED_PORT,
10, False):
- raise errors.OpExecError("Node claims it doesn't have the"
- " secondary ip you gave (%s).\n"
- "Please fix and re-run this command." %
- new_node.secondary_ip)
+ raise errors.OpExecError("Node claims it doesn't have the secondary ip"
+ " you gave (%s). Please fix and re-run this"
+ " command." % new_node.secondary_ip)
success, msg = ssh.VerifyNodeHostname(node)
if not success:
raise errors.OpExecError("Node '%s' claims it has a different hostname"
- " than the one the resolver gives: %s.\n"
- "Please fix and re-run this command." %
+ " than the one the resolver gives: %s."
+ " Please fix and re-run this command." %
(node, msg))
# Distribute updated /etc/hosts and known_hosts to all nodes,
dist_nodes.remove(myself.name)
logger.Debug("Copying hosts and known_hosts to all nodes")
- for fname in ("/etc/hosts", constants.SSH_KNOWN_HOSTS_FILE):
+ for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
result = rpc.call_upload_file(dist_nodes, fname)
for to_node in dist_nodes:
if not result[to_node]:
(fname, to_node))
to_copy = ss.GetFileList()
+ if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+ to_copy.append(constants.VNC_PASSWORD_FILE)
for fname in to_copy:
if not ssh.CopyFileToNode(node, fname):
logger.Error("could not copy file %s to node %s" % (fname, node))
if self.old_master == self.new_master:
raise errors.OpPrereqError("This commands must be run on the node"
- " where you want the new master to be.\n"
- "%s is already the master" %
+ " where you want the new master to be."
+ " %s is already the master" %
self.old_master)
def Exec(self, feedback_fn):
if not rpc.call_node_start_master(self.new_master):
logger.Error("could not start the master role on the new master"
" %s, please check" % self.new_master)
- feedback_fn("Error in activating the master IP on the new master,\n"
- "please fix manually.")
+ feedback_fn("Error in activating the master IP on the new master,"
+ " please fix manually.")
"""
device_info = []
disks_ok = True
+ iname = instance.name
+ # With the two passes mechanism we try to reduce the window of
+ # opportunity for the race condition of switching DRBD to primary
+ # before handshaking occured, but we do not eliminate it
+
+ # The proper fix would be to wait (with some limits) until the
+ # connection has been made and drbd transitions from WFConnection
+ # into any other network-connected state (Connected, SyncTarget,
+ # SyncSource, etc.)
+
+ # 1st pass, assemble on all nodes in secondary mode
for inst_disk in instance.disks:
- master_result = None
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
cfg.SetDiskID(node_disk, node)
- is_primary = node == instance.primary_node
- result = rpc.call_blockdev_assemble(node, node_disk,
- instance.name, is_primary)
+ result = rpc.call_blockdev_assemble(node, node_disk, iname, False)
if not result:
- logger.Error("could not prepare block device %s on node %s (is_pri"
- "mary=%s)" % (inst_disk.iv_name, node, is_primary))
- if is_primary or not ignore_secondaries:
+ logger.Error("could not prepare block device %s on node %s"
+ " (is_primary=False, pass=1)" % (inst_disk.iv_name, node))
+ if not ignore_secondaries:
disks_ok = False
- if is_primary:
- master_result = result
- device_info.append((instance.primary_node, inst_disk.iv_name,
- master_result))
+
+ # FIXME: race condition on drbd migration to primary
+
+ # 2nd pass, do only the primary node
+ for inst_disk in instance.disks:
+ for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
+ if node != instance.primary_node:
+ continue
+ cfg.SetDiskID(node_disk, node)
+ result = rpc.call_blockdev_assemble(node, node_disk, iname, True)
+ if not result:
+ logger.Error("could not prepare block device %s on node %s"
+ " (is_primary=True, pass=2)" % (inst_disk.iv_name, node))
+ disks_ok = False
+ device_info.append((instance.primary_node, inst_disk.iv_name, result))
# leave the disks configured for the primary node
# this is a workaround that would be fixed better by
return result
+def _CheckNodeFreeMemory(cfg, node, reason, requested):
+ """Checks if a node has enough free memory.
+
+ This function check if a given node has the needed amount of free
+ memory. In case the node has less memory or we cannot get the
+ information from the node, this function raise an OpPrereqError
+ exception.
+
+ Args:
+ - cfg: a ConfigWriter instance
+ - node: the node name
+ - reason: string to use in the error message
+ - requested: the amount of memory in MiB
+
+ """
+ nodeinfo = rpc.call_node_info([node], cfg.GetVGName())
+ if not nodeinfo or not isinstance(nodeinfo, dict):
+ raise errors.OpPrereqError("Could not contact node %s for resource"
+ " information" % (node,))
+
+ free_mem = nodeinfo[node].get('memory_free')
+ if not isinstance(free_mem, int):
+ raise errors.OpPrereqError("Can't compute free memory on node %s, result"
+ " was '%s'" % (node, free_mem))
+ if requested > free_mem:
+ raise errors.OpPrereqError("Not enough memory on node %s for %s:"
+ " needed %s MiB, available %s MiB" %
+ (node, reason, requested, free_mem))
+
+
class LUStartupInstance(LogicalUnit):
"""Starts an instance.
# check bridges existance
_CheckInstanceBridgesExist(instance)
+ _CheckNodeFreeMemory(self.cfg, instance.primary_node,
+ "starting instance %s" % instance.name,
+ instance.memory)
+
self.instance = instance
self.op.instance_name = instance.name
node_current = instance.primary_node
- nodeinfo = rpc.call_node_info([node_current], self.cfg.GetVGName())
- if not nodeinfo:
- raise errors.OpExecError("Could not contact node %s for infos" %
- (node_current))
-
- freememory = nodeinfo[node_current]['memory_free']
- memory = instance.memory
- if memory > freememory:
- raise errors.OpExecError("Not enough memory to start instance"
- " %s on node %s"
- " needed %s MiB, available %s MiB" %
- (instance.name, node_current, memory,
- freememory))
-
_StartInstanceDisks(self.cfg, instance, force)
if not rpc.call_instance_start(node_current, instance, extra_args):
try:
feedback_fn("Running the instance OS create scripts...")
if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"):
- raise errors.OpExecError("Could not install OS for instance %s "
- "on node %s" %
+ raise errors.OpExecError("Could not install OS for instance %s"
+ " on node %s" %
(inst.name, inst.primary_node))
finally:
_ShutdownInstanceDisks(inst, self.cfg)
name_info = utils.HostInfo(self.op.new_name)
self.op.new_name = new_name = name_info.name
+ instance_list = self.cfg.GetInstanceList()
+ if new_name in instance_list:
+ raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
+ instance_name)
+
if not getattr(self.op, "ignore_ip", False):
command = ["fping", "-q", name_info.ip]
result = utils.RunCmd(command)
try:
if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
"sda", "sdb"):
- msg = ("Could run OS rename script for instance %s\n"
- "on node %s\n"
- "(but the instance has been renamed in Ganeti)" %
+ msg = ("Could run OS rename script for instance %s on node %s (but the"
+ " instance has been renamed in Ganeti)" %
(inst.name, inst.primary_node))
logger.Error(msg)
finally:
This checks that the fields required are valid output fields.
"""
- self.dynamic_fields = frozenset(["oper_state", "oper_ram"])
+ self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
_CheckOutputFields(static=["name", "os", "pnode", "snodes",
"admin_state", "admin_ram",
"disk_template", "ip", "mac", "bridge",
- "sda_size", "sdb_size"],
+ "sda_size", "sdb_size", "vcpus"],
dynamic=self.dynamic_fields,
selected=self.op.output_fields)
val = None
else:
val = bool(live_data.get(instance.name))
+ elif field == "status":
+ if instance.primary_node in bad_nodes:
+ val = "ERROR_nodedown"
+ else:
+ running = bool(live_data.get(instance.name))
+ if running:
+ if instance.status != "down":
+ val = "running"
+ else:
+ val = "ERROR_up"
+ else:
+ if instance.status != "down":
+ val = "ERROR_down"
+ else:
+ val = "ADMIN_down"
elif field == "admin_ram":
val = instance.memory
elif field == "oper_ram":
val = None
else:
val = disk.size
+ elif field == "vcpus":
+ val = instance.vcpus
else:
raise errors.ParameterError(field)
iout.append(val)
raise errors.ProgrammerError("no secondary node but using "
"DT_REMOTE_RAID1 template")
- # check memory requirements on the secondary node
target_node = secondary_nodes[0]
- nodeinfo = rpc.call_node_info([target_node], self.cfg.GetVGName())
- info = nodeinfo.get(target_node, None)
- if not info:
- raise errors.OpPrereqError("Cannot get current information"
- " from node '%s'" % nodeinfo)
- if instance.memory > info['memory_free']:
- raise errors.OpPrereqError("Not enough memory on target node %s."
- " %d MB available, %d MB required" %
- (target_node, info['memory_free'],
- instance.memory))
+ # check memory requirements on the secondary node
+ _CheckNodeFreeMemory(self.cfg, target_node, "failing over instance %s" %
+ instance.name, instance.memory)
# check bridge existance
brlist = [nic.bridge for nic in instance.nics]
raise errors.OpExecError("Disk %s is degraded on target node,"
" aborting failover." % dev.iv_name)
- feedback_fn("* checking target node resource availability")
- nodeinfo = rpc.call_node_info([target_node], self.cfg.GetVGName())
-
- if not nodeinfo:
- raise errors.OpExecError("Could not contact target node %s." %
- target_node)
-
- free_memory = int(nodeinfo[target_node]['memory_free'])
- memory = instance.memory
- if memory > free_memory:
- raise errors.OpExecError("Not enough memory to create instance %s on"
- " node %s. needed %s MiB, available %s MiB" %
- (instance.name, target_node, memory,
- free_memory))
-
feedback_fn("* shutting down instance on source node")
logger.Info("Shutting down instance %s on node %s" %
(instance.name, source_node))
#TODO: compute space requirements
vgname = cfg.GetVGName()
- if template_name == "diskless":
+ if template_name == constants.DT_DISKLESS:
disks = []
- elif template_name == "plain":
+ elif template_name == constants.DT_PLAIN:
if len(secondary_nodes) != 0:
raise errors.ProgrammerError("Wrong template configuration")
logical_id=(vgname, names[1]),
iv_name = "sdb")
disks = [sda_dev, sdb_dev]
- elif template_name == "local_raid1":
+ elif template_name == constants.DT_LOCAL_RAID1:
if len(secondary_nodes) != 0:
raise errors.ProgrammerError("Wrong template configuration")
HTYPE = constants.HTYPE_INSTANCE
_OP_REQP = ["instance_name", "mem_size", "disk_size", "pnode",
"disk_template", "swap_size", "mode", "start", "vcpus",
- "wait_for_sync", "ip_check"]
+ "wait_for_sync", "ip_check", "mac"]
def BuildHooksEnv(self):
"""Build hooks env.
os_type=self.op.os_type,
memory=self.op.mem_size,
vcpus=self.op.vcpus,
- nics=[(self.inst_ip, self.op.bridge)],
+ nics=[(self.inst_ip, self.op.bridge, self.op.mac)],
))
nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
"""Check prerequisites.
"""
+ for attr in ["kernel_path", "initrd_path", "hvm_boot_order"]:
+ if not hasattr(self.op, attr):
+ setattr(self.op, attr, None)
+
if self.op.mode not in (constants.INSTANCE_CREATE,
constants.INSTANCE_IMPORT):
raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
" the primary node.")
self.secondaries.append(snode_name)
- # Check lv size requirements
- nodenames = [pnode.name] + self.secondaries
- nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
-
# Required free disk space as a function of disk and swap space
req_size_dict = {
- constants.DT_DISKLESS: 0,
+ constants.DT_DISKLESS: None,
constants.DT_PLAIN: self.op.disk_size + self.op.swap_size,
constants.DT_LOCAL_RAID1: (self.op.disk_size + self.op.swap_size) * 2,
# 256 MB are added for drbd metadata, 128MB for each drbd device
req_size = req_size_dict[self.op.disk_template]
- for node in nodenames:
- info = nodeinfo.get(node, None)
- if not info:
- raise errors.OpPrereqError("Cannot get current information"
- " from node '%s'" % nodeinfo)
- if req_size > info['vg_free']:
- raise errors.OpPrereqError("Not enough disk space on target node %s."
- " %d MB available, %d MB required" %
- (node, info['vg_free'], req_size))
+ # Check lv size requirements
+ if req_size is not None:
+ nodenames = [pnode.name] + self.secondaries
+ nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
+ for node in nodenames:
+ info = nodeinfo.get(node, None)
+ if not info:
+ raise errors.OpPrereqError("Cannot get current information"
+ " from node '%s'" % nodeinfo)
+ vg_free = info.get('vg_free', None)
+ if not isinstance(vg_free, int):
+ raise errors.OpPrereqError("Can't compute free disk space on"
+ " node %s" % node)
+ if req_size > info['vg_free']:
+ raise errors.OpPrereqError("Not enough disk space on target node %s."
+ " %d MB available, %d MB required" %
+ (node, info['vg_free'], req_size))
# os verification
os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
raise errors.OpPrereqError("OS '%s' not in supported os list for"
" primary node" % self.op.os_type)
+ if self.op.kernel_path == constants.VALUE_NONE:
+ raise errors.OpPrereqError("Can't set instance kernel to none")
+
# instance verification
hostname1 = utils.HostInfo(self.op.instance_name)
" adding an instance in start mode")
if self.op.ip_check:
- if utils.TcpPing(utils.HostInfo().name, hostname1.ip,
- constants.DEFAULT_NODED_PORT):
+ if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("IP %s of instance %s already in use" %
(hostname1.ip, instance_name))
+ # MAC address verification
+ if self.op.mac != "auto":
+ if not utils.IsValidMac(self.op.mac.lower()):
+ raise errors.OpPrereqError("invalid MAC address specified: %s" %
+ self.op.mac)
+
# bridge verification
bridge = getattr(self.op, "bridge", None)
if bridge is None:
" destination node '%s'" %
(self.op.bridge, pnode.name))
+ # boot order verification
+ if self.op.hvm_boot_order is not None:
+ if len(self.op.hvm_boot_order.strip("acdn")) != 0:
+ raise errors.OpPrereqError("invalid boot order specified,"
+ " must be one or more of [acdn]")
+
if self.op.start:
self.instance_status = 'up'
else:
instance = self.op.instance_name
pnode_name = self.pnode.name
- nic = objects.NIC(bridge=self.op.bridge, mac=self.cfg.GenerateMAC())
+ if self.op.mac == "auto":
+ mac_address = self.cfg.GenerateMAC()
+ else:
+ mac_address = self.op.mac
+
+ nic = objects.NIC(bridge=self.op.bridge, mac=mac_address)
if self.inst_ip is not None:
nic.ip = self.inst_ip
+ ht_kind = self.sstore.GetHypervisorType()
+ if ht_kind in constants.HTS_REQ_PORT:
+ network_port = self.cfg.AllocatePort()
+ else:
+ network_port = None
+
disks = _GenerateDiskTemplate(self.cfg,
self.op.disk_template,
instance, pnode_name,
nics=[nic], disks=disks,
disk_template=self.op.disk_template,
status=self.instance_status,
+ network_port=network_port,
+ kernel_path=self.op.kernel_path,
+ initrd_path=self.op.initrd_path,
+ hvm_boot_order=self.op.hvm_boot_order,
)
feedback_fn("* creating instance disks...")
logger.Debug("connecting to console of %s on %s" % (instance.name, node))
hyper = hypervisor.GetHypervisor()
- console_cmd = hyper.GetShellCommandForConsole(instance.name)
+ console_cmd = hyper.GetShellCommandForConsole(instance)
# build ssh cmdline
argv = ["ssh", "-q", "-t"]
argv.extend(ssh.KNOWN_HOSTS_OPTS)
raise errors.OpPrereqError("Can't find this device ('%s') in the"
" instance." % self.op.disk_name)
if len(disk.children) > 1:
- raise errors.OpPrereqError("The device already has two slave"
- " devices.\n"
- "This would create a 3-disk raid1"
- " which we don't allow.")
+ raise errors.OpPrereqError("The device already has two slave devices."
+ " This would create a 3-disk raid1 which we"
+ " don't allow.")
self.disk = disk
def Exec(self, feedback_fn):
raise errors.OpPrereqError("Instance '%s' not known" %
self.op.instance_name)
self.instance = instance
+ self.op.instance_name = instance.name
if instance.disk_template not in constants.DTS_NET_MIRROR:
raise errors.OpPrereqError("Instance's disk layout is not"
raise errors.OpPrereqError("Template 'remote_raid1' only allows all"
" disks replacement, not individual ones")
if instance.disk_template == constants.DT_DRBD8:
+ if (self.op.mode == constants.REPLACE_DISK_ALL and
+ remote_node is not None):
+ # switch to replace secondary mode
+ self.op.mode = constants.REPLACE_DISK_SEC
+
if self.op.mode == constants.REPLACE_DISK_ALL:
- raise errors.OpPrereqError("Template 'drbd8' only allows primary or"
+ raise errors.OpPrereqError("Template 'drbd' only allows primary or"
" secondary disk replacement, not"
" both at once")
elif self.op.mode == constants.REPLACE_DISK_PRI:
if remote_node is not None:
- raise errors.OpPrereqError("Template 'drbd8' does not allow changing"
+ raise errors.OpPrereqError("Template 'drbd' does not allow changing"
" the secondary while doing a primary"
" node disk replacement")
self.tgt_node = instance.primary_node
if not _CreateBlockDevOnSecondary(cfg, remote_node, instance,
new_drbd, False,
_GetInstanceInfoText(instance)):
- raise errors.OpExecError("Failed to create new component on"
- " secondary node %s\n"
- "Full abort, cleanup manually!" %
+ raise errors.OpExecError("Failed to create new component on secondary"
+ " node %s. Full abort, cleanup manually!" %
remote_node)
logger.Info("adding new mirror component on primary")
# remove secondary dev
cfg.SetDiskID(new_drbd, remote_node)
rpc.call_blockdev_remove(remote_node, new_drbd)
- raise errors.OpExecError("Failed to create volume on primary!\n"
- "Full abort, cleanup manually!!")
+ raise errors.OpExecError("Failed to create volume on primary!"
+ " Full abort, cleanup manually!!")
# the device exists now
# call the primary node to add the mirror to md
# ok, we created the new LVs, so now we know we have the needed
# storage; as such, we proceed on the target node to rename
# old_lv to _old, and new_lv to old_lv; note that we rename LVs
- # using the assumption than logical_id == physical_id (which in
+ # using the assumption that logical_id == physical_id (which in
# turn is the unique_id on that node)
# FIXME(iustin): use a better name for the replaced LVs
if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
for new_lv in new_lvs:
if not rpc.call_blockdev_remove(tgt_node, new_lv):
- warning("Can't rollback device %s", "manually cleanup unused"
+ warning("Can't rollback device %s", hint="manually cleanup unused"
" logical volumes")
raise errors.OpExecError("Can't add local storage to drbd")
for lv in old_lvs:
cfg.SetDiskID(lv, tgt_node)
if not rpc.call_blockdev_remove(tgt_node, lv):
- warning("Can't remove old LV", "manually remove unused LVs")
+ warning("Can't remove old LV", hint="manually remove unused LVs")
continue
def _ExecD8Secondary(self, feedback_fn):
cfg.SetDiskID(dev, old_node)
if not rpc.call_blockdev_shutdown(old_node, dev):
warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
- "Please cleanup this device manuall as soon as possible")
+ hint="Please cleanup this device manually as soon as possible")
- # we have new storage, we 'rename' the network on the primary
- info("switching primary drbd for %s to new secondary node" % dev.iv_name)
+ info("detaching primary drbds from the network (=> standalone)")
+ done = 0
+ for dev in instance.disks:
cfg.SetDiskID(dev, pri_node)
- # rename to the ip of the new node
- new_uid = list(dev.physical_id)
- new_uid[2] = self.remote_node_info.secondary_ip
- rlist = [(dev, tuple(new_uid))]
- if not rpc.call_blockdev_rename(pri_node, rlist):
- raise errors.OpExecError("Can't detach & re-attach drbd %s on node"
- " %s from %s to %s" %
- (dev.iv_name, pri_node, old_node, new_node))
- dev.logical_id = (pri_node, new_node, dev.logical_id[2])
+ # set the physical (unique in bdev terms) id to None, meaning
+ # detach from network
+ dev.physical_id = (None,) * len(dev.physical_id)
+ # and 'find' the device, which will 'fix' it to match the
+ # standalone state
+ if rpc.call_blockdev_find(pri_node, dev):
+ done += 1
+ else:
+ warning("Failed to detach drbd %s from network, unusual case" %
+ dev.iv_name)
+
+ if not done:
+ # no detaches succeeded (very unlikely)
+ raise errors.OpExecError("Can't detach at least one DRBD from old node")
+
+ # if we managed to detach at least one, we update all the disks of
+ # the instance to point to the new secondary
+ info("updating instance configuration")
+ for dev in instance.disks:
+ dev.logical_id = (pri_node, new_node) + dev.logical_id[2:]
cfg.SetDiskID(dev, pri_node)
- cfg.Update(instance)
+ cfg.Update(instance)
+ # and now perform the drbd attach
+ info("attaching primary drbds to new secondary (standalone => connected)")
+ failures = []
+ for dev in instance.disks:
+ info("attaching primary drbd for %s to new secondary node" % dev.iv_name)
+ # since the attach is smart, it's enough to 'find' the device,
+ # it will automatically activate the network, if the physical_id
+ # is correct
+ cfg.SetDiskID(dev, pri_node)
+ if not rpc.call_blockdev_find(pri_node, dev):
+ warning("can't attach drbd %s to new secondary!" % dev.iv_name,
+ "please do a gnt-instance info to see the status of disks")
# this can fail as the old devices are degraded and _WaitForSync
# does a combined result over all disks, so we don't check its
cfg.SetDiskID(lv, old_node)
if not rpc.call_blockdev_remove(old_node, lv):
warning("Can't remove LV on old secondary",
- "Cleanup stale volumes by hand")
+ hint="Cleanup stale volumes by hand")
def Exec(self, feedback_fn):
"""Execute disk replacement.
instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name))
if instance is None:
raise errors.OpPrereqError("No such instance name '%s'" % name)
- self.wanted_instances.append(instance)
+ self.wanted_instances.append(instance)
else:
self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
in self.cfg.GetInstanceList()]
"memory": instance.memory,
"nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
"disks": disks,
+ "network_port": instance.network_port,
"vcpus": instance.vcpus,
+ "kernel_path": instance.kernel_path,
+ "initrd_path": instance.initrd_path,
+ "hvm_boot_order": instance.hvm_boot_order,
}
result[instance.name] = idict
args['memory'] = self.mem
if self.vcpus:
args['vcpus'] = self.vcpus
- if self.do_ip or self.do_bridge:
+ if self.do_ip or self.do_bridge or self.mac:
if self.do_ip:
ip = self.ip
else:
bridge = self.bridge
else:
bridge = self.instance.nics[0].bridge
- args['nics'] = [(ip, bridge)]
+ if self.mac:
+ mac = self.mac
+ else:
+ mac = self.instance.nics[0].mac
+ args['nics'] = [(ip, bridge, mac)]
env = _BuildInstanceHookEnvByObject(self.instance, override=args)
nl = [self.sstore.GetMasterNode(),
self.instance.primary_node] + list(self.instance.secondary_nodes)
self.mem = getattr(self.op, "mem", None)
self.vcpus = getattr(self.op, "vcpus", None)
self.ip = getattr(self.op, "ip", None)
+ self.mac = getattr(self.op, "mac", None)
self.bridge = getattr(self.op, "bridge", None)
- if [self.mem, self.vcpus, self.ip, self.bridge].count(None) == 4:
+ self.kernel_path = getattr(self.op, "kernel_path", None)
+ self.initrd_path = getattr(self.op, "initrd_path", None)
+ self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
+ all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
+ self.kernel_path, self.initrd_path, self.hvm_boot_order]
+ if all_parms.count(None) == len(all_parms):
raise errors.OpPrereqError("No changes submitted")
if self.mem is not None:
try:
else:
self.do_ip = False
self.do_bridge = (self.bridge is not None)
+ if self.mac is not None:
+ if self.cfg.IsMacInUse(self.mac):
+ raise errors.OpPrereqError('MAC address %s already in use in cluster' %
+ self.mac)
+ if not utils.IsValidMac(self.mac):
+ raise errors.OpPrereqError('Invalid MAC address %s' % self.mac)
+
+ if self.kernel_path is not None:
+ self.do_kernel_path = True
+ if self.kernel_path == constants.VALUE_NONE:
+ raise errors.OpPrereqError("Can't set instance to no kernel")
+
+ if self.kernel_path != constants.VALUE_DEFAULT:
+ if not os.path.isabs(self.kernel_path):
+ raise errors.OpPrereqError("The kernel path must be an absolute"
+ " filename")
+ else:
+ self.do_kernel_path = False
+
+ if self.initrd_path is not None:
+ self.do_initrd_path = True
+ if self.initrd_path not in (constants.VALUE_NONE,
+ constants.VALUE_DEFAULT):
+ if not os.path.isabs(self.initrd_path):
+ raise errors.OpPrereqError("The initrd path must be an absolute"
+ " filename")
+ else:
+ self.do_initrd_path = False
+
+ # boot order verification
+ if self.hvm_boot_order is not None:
+ if self.hvm_boot_order != constants.VALUE_DEFAULT:
+ if len(self.hvm_boot_order.strip("acdn")) != 0:
+ raise errors.OpPrereqError("invalid boot order specified,"
+ " must be one or more of [acdn]"
+ " or 'default'")
instance = self.cfg.GetInstanceInfo(
self.cfg.ExpandInstanceName(self.op.instance_name))
if self.bridge:
instance.nics[0].bridge = self.bridge
result.append(("bridge", self.bridge))
+ if self.mac:
+ instance.nics[0].mac = self.mac
+ result.append(("mac", self.mac))
+ if self.do_kernel_path:
+ instance.kernel_path = self.kernel_path
+ result.append(("kernel_path", self.kernel_path))
+ if self.do_initrd_path:
+ instance.initrd_path = self.initrd_path
+ result.append(("initrd_path", self.initrd_path))
+ if self.hvm_boot_order:
+ if self.hvm_boot_order == constants.VALUE_DEFAULT:
+ instance.hvm_boot_order = None
+ else:
+ instance.hvm_boot_order = self.hvm_boot_order
+ result.append(("hvm_boot_order", self.hvm_boot_order))
self.cfg.AddInstance(instance)
raise errors.OpRetryError("There has been a modification to the"
" config file and the operation has been"
" aborted. Please retry.")
+
+class LUTestDelay(NoHooksLU):
+ """Sleep for a specified amount of time.
+
+ This LU sleeps on the master and/or nodes for a specified amoutn of
+ time.
+
+ """
+ _OP_REQP = ["duration", "on_master", "on_nodes"]
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that we have a good list of nodes and/or the duration
+ is valid.
+
+ """
+
+ if self.op.on_nodes:
+ self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
+
+ def Exec(self, feedback_fn):
+ """Do the actual sleep.
+
+ """
+ if self.op.on_master:
+ if not utils.TestDelay(self.op.duration):
+ raise errors.OpExecError("Error during master delay test")
+ if self.op.on_nodes:
+ result = rpc.call_test_delay(self.op.on_nodes, self.op.duration)
+ if not result:
+ raise errors.OpExecError("Complete failure from rpc call")
+ for node, node_result in result.items():
+ if not node_result:
+ raise errors.OpExecError("Failure during rpc call to node %s,"
+ " result: %s" % (node, node_result))