if hostname.ip.startswith("127."):
raise errors.OpPrereqError("This host's IP resolves to the private"
- " range (%s). Please fix DNS or /etc/hosts." %
- (hostname.ip,))
+ " range (%s). Please fix DNS or %s." %
+ (hostname.ip, constants.ETC_HOSTS))
self.clustername = clustername = utils.HostInfo(self.op.cluster_name)
- if not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, hostname.ip,
- constants.DEFAULT_NODED_PORT):
+ if not utils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT,
+ source=constants.LOCALHOST_IP_ADDRESS):
raise errors.OpPrereqError("Inconsistency: this host's name resolves"
" to %s,\nbut this ip address does not"
" belong to this host."
raise errors.OpPrereqError("Invalid secondary ip given")
if (secondary_ip and
secondary_ip != hostname.ip and
- (not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, secondary_ip,
- constants.DEFAULT_NODED_PORT))):
+ (not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
+ source=constants.LOCALHOST_IP_ADDRESS))):
raise errors.OpPrereqError("You gave %s as secondary IP,"
" but it does not belong to this host." %
secondary_ip)
"""
master = self.sstore.GetMasterNode()
+ if not rpc.call_node_stop_master(master):
+ raise errors.OpExecError("Could not disable the master role")
priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
utils.CreateBackup(priv_key)
utils.CreateBackup(pub_key)
inst = nv_dict.pop((node, lv_name), None)
if (not lv_online and inst is not None
and inst.name not in res_instances):
- res_instances.append(inst.name)
+ res_instances.append(inst.name)
# any leftover items in nv_dict are missing LVs, let's arrange the
# data better
"""
env = {
- "OP_TARGET": self.op.sstore.GetClusterName(),
+ "OP_TARGET": self.sstore.GetClusterName(),
"NEW_NAME": self.op.name,
}
mn = self.sstore.GetMasterNode()
" new node doesn't have one")
# checks reachablity
- if not utils.TcpPing(utils.HostInfo().name,
- primary_ip,
- constants.DEFAULT_NODED_PORT):
+ if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("Node not reachable by ping")
if not newbie_singlehomed:
# check reachability from my secondary ip to newbie's secondary ip
- if not utils.TcpPing(myself.secondary_ip,
- secondary_ip,
- constants.DEFAULT_NODED_PORT):
+ if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
+ source=myself.secondary_ip):
raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
" based ping to noded port")
dist_nodes.remove(myself.name)
logger.Debug("Copying hosts and known_hosts to all nodes")
- for fname in ("/etc/hosts", constants.SSH_KNOWN_HOSTS_FILE):
+ for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
result = rpc.call_upload_file(dist_nodes, fname)
for to_node in dist_nodes:
if not result[to_node]:
"""
device_info = []
disks_ok = True
+ iname = instance.name
+ # With the two passes mechanism we try to reduce the window of
+ # opportunity for the race condition of switching DRBD to primary
+ # before handshaking occured, but we do not eliminate it
+
+ # The proper fix would be to wait (with some limits) until the
+ # connection has been made and drbd transitions from WFConnection
+ # into any other network-connected state (Connected, SyncTarget,
+ # SyncSource, etc.)
+
+ # 1st pass, assemble on all nodes in secondary mode
for inst_disk in instance.disks:
- master_result = None
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
cfg.SetDiskID(node_disk, node)
- is_primary = node == instance.primary_node
- result = rpc.call_blockdev_assemble(node, node_disk,
- instance.name, is_primary)
+ result = rpc.call_blockdev_assemble(node, node_disk, iname, False)
if not result:
logger.Error("could not prepare block device %s on node %s"
- " (is_primary=%s)" %
- (inst_disk.iv_name, node, is_primary))
- if is_primary or not ignore_secondaries:
+ " (is_primary=False, pass=1)" % (inst_disk.iv_name, node))
+ if not ignore_secondaries:
disks_ok = False
- if is_primary:
- master_result = result
- device_info.append((instance.primary_node, inst_disk.iv_name,
- master_result))
+
+ # FIXME: race condition on drbd migration to primary
+
+ # 2nd pass, do only the primary node
+ for inst_disk in instance.disks:
+ for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
+ if node != instance.primary_node:
+ continue
+ cfg.SetDiskID(node_disk, node)
+ result = rpc.call_blockdev_assemble(node, node_disk, iname, True)
+ if not result:
+ logger.Error("could not prepare block device %s on node %s"
+ " (is_primary=True, pass=2)" % (inst_disk.iv_name, node))
+ disks_ok = False
+ device_info.append((instance.primary_node, inst_disk.iv_name, result))
# leave the disks configured for the primary node
# this is a workaround that would be fixed better by
name_info = utils.HostInfo(self.op.new_name)
self.op.new_name = new_name = name_info.name
+ instance_list = self.cfg.GetInstanceList()
+ if new_name in instance_list:
+ raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
+ instance_name)
+
if not getattr(self.op, "ignore_ip", False):
command = ["fping", "-q", name_info.ip]
result = utils.RunCmd(command)
This checks that the fields required are valid output fields.
"""
- self.dynamic_fields = frozenset(["oper_state", "oper_ram"])
+ self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
_CheckOutputFields(static=["name", "os", "pnode", "snodes",
"admin_state", "admin_ram",
"disk_template", "ip", "mac", "bridge",
val = None
else:
val = bool(live_data.get(instance.name))
+ elif field == "status":
+ if instance.primary_node in bad_nodes:
+ val = "ERROR_nodedown"
+ else:
+ running = bool(live_data.get(instance.name))
+ if running:
+ if instance.status != "down":
+ val = "running"
+ else:
+ val = "ERROR_up"
+ else:
+ if instance.status != "down":
+ val = "ERROR_down"
+ else:
+ val = "ADMIN_down"
elif field == "admin_ram":
val = instance.memory
elif field == "oper_ram":
#TODO: compute space requirements
vgname = cfg.GetVGName()
- if template_name == "diskless":
+ if template_name == constants.DT_DISKLESS:
disks = []
- elif template_name == "plain":
+ elif template_name == constants.DT_PLAIN:
if len(secondary_nodes) != 0:
raise errors.ProgrammerError("Wrong template configuration")
logical_id=(vgname, names[1]),
iv_name = "sdb")
disks = [sda_dev, sdb_dev]
- elif template_name == "local_raid1":
+ elif template_name == constants.DT_LOCAL_RAID1:
if len(secondary_nodes) != 0:
raise errors.ProgrammerError("Wrong template configuration")
os_type=self.op.os_type,
memory=self.op.mem_size,
vcpus=self.op.vcpus,
- nics=[(self.inst_ip, self.op.bridge)],
+ nics=[(self.inst_ip, self.op.bridge, self.op.mac)],
))
nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
" adding an instance in start mode")
if self.op.ip_check:
- if utils.TcpPing(utils.HostInfo().name, hostname1.ip,
- constants.DEFAULT_NODED_PORT):
+ if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("IP %s of instance %s already in use" %
(hostname1.ip, instance_name))
# boot order verification
if self.op.hvm_boot_order is not None:
if len(self.op.hvm_boot_order.strip("acdn")) != 0:
- raise errors.OpPrereqError("invalid boot order specified,"
- " must be one or more of [acdn]")
+ raise errors.OpPrereqError("invalid boot order specified,"
+ " must be one or more of [acdn]")
if self.op.start:
self.instance_status = 'up'
# ok, we created the new LVs, so now we know we have the needed
# storage; as such, we proceed on the target node to rename
# old_lv to _old, and new_lv to old_lv; note that we rename LVs
- # using the assumption than logical_id == physical_id (which in
+ # using the assumption that logical_id == physical_id (which in
# turn is the unique_id on that node)
# FIXME(iustin): use a better name for the replaced LVs
instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name))
if instance is None:
raise errors.OpPrereqError("No such instance name '%s'" % name)
- self.wanted_instances.append(instance)
+ self.wanted_instances.append(instance)
else:
self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
in self.cfg.GetInstanceList()]
args['memory'] = self.mem
if self.vcpus:
args['vcpus'] = self.vcpus
- if self.do_ip or self.do_bridge:
+ if self.do_ip or self.do_bridge or self.mac:
if self.do_ip:
ip = self.ip
else:
bridge = self.bridge
else:
bridge = self.instance.nics[0].bridge
- args['nics'] = [(ip, bridge)]
+ if self.mac:
+ mac = self.mac
+ else:
+ mac = self.instance.nics[0].mac
+ args['nics'] = [(ip, bridge, mac)]
env = _BuildInstanceHookEnvByObject(self.instance, override=args)
nl = [self.sstore.GetMasterNode(),
self.instance.primary_node] + list(self.instance.secondary_nodes)
raise errors.OpRetryError("There has been a modification to the"
" config file and the operation has been"
" aborted. Please retry.")
+
+class LUTestDelay(NoHooksLU):
+ """Sleep for a specified amount of time.
+
+ This LU sleeps on the master and/or nodes for a specified amoutn of
+ time.
+
+ """
+ _OP_REQP = ["duration", "on_master", "on_nodes"]
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that we have a good list of nodes and/or the duration
+ is valid.
+
+ """
+
+ if self.op.on_nodes:
+ self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
+
+ def Exec(self, feedback_fn):
+ """Do the actual sleep.
+
+ """
+ if self.op.on_master:
+ if not utils.TestDelay(self.op.duration):
+ raise errors.OpExecError("Error during master delay test")
+ if self.op.on_nodes:
+ result = rpc.call_test_delay(self.op.on_nodes, self.op.duration)
+ if not result:
+ raise errors.OpExecError("Complete failure from rpc call")
+ for node, node_result in result.items():
+ if not node_result:
+ raise errors.OpExecError("Failure during rpc call to node %s,"
+ " result: %s" % (node, node_result))