X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/71c42cd84159a74f85e702b653298037009ed106..fc490dbe206e4c89b75d0f3e96ff4418242bac34:/lib/cmdlib.py diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 6318d45..2e26cfb 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -510,13 +510,13 @@ class LUInitCluster(LogicalUnit): if hostname.ip.startswith("127."): raise errors.OpPrereqError("This host's IP resolves to the private" - " range (%s). Please fix DNS or /etc/hosts." % - (hostname.ip,)) + " range (%s). Please fix DNS or %s." % + (hostname.ip, constants.ETC_HOSTS)) self.clustername = clustername = utils.HostInfo(self.op.cluster_name) - if not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, hostname.ip, - constants.DEFAULT_NODED_PORT): + if not utils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT, + source=constants.LOCALHOST_IP_ADDRESS): raise errors.OpPrereqError("Inconsistency: this host's name resolves" " to %s,\nbut this ip address does not" " belong to this host." @@ -527,8 +527,8 @@ class LUInitCluster(LogicalUnit): raise errors.OpPrereqError("Invalid secondary ip given") if (secondary_ip and secondary_ip != hostname.ip and - (not utils.TcpPing(constants.LOCALHOST_IP_ADDRESS, secondary_ip, - constants.DEFAULT_NODED_PORT))): + (not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, + source=constants.LOCALHOST_IP_ADDRESS))): raise errors.OpPrereqError("You gave %s as secondary IP," " but it does not belong to this host." % secondary_ip) @@ -632,6 +632,8 @@ class LUDestroyCluster(NoHooksLU): """ master = self.sstore.GetMasterNode() + if not rpc.call_node_stop_master(master): + raise errors.OpExecError("Could not disable the master role") priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS) utils.CreateBackup(priv_key) utils.CreateBackup(pub_key) @@ -947,7 +949,7 @@ class LUVerifyDisks(NoHooksLU): inst = nv_dict.pop((node, lv_name), None) if (not lv_online and inst is not None and inst.name not in res_instances): - res_instances.append(inst.name) + res_instances.append(inst.name) # any leftover items in nv_dict are missing LVs, let's arrange the # data better @@ -972,7 +974,7 @@ class LURenameCluster(LogicalUnit): """ env = { - "OP_TARGET": self.op.sstore.GetClusterName(), + "OP_TARGET": self.sstore.GetClusterName(), "NEW_NAME": self.op.name, } mn = self.sstore.GetMasterNode() @@ -1475,16 +1477,13 @@ class LUAddNode(LogicalUnit): " new node doesn't have one") # checks reachablity - if not utils.TcpPing(utils.HostInfo().name, - primary_ip, - constants.DEFAULT_NODED_PORT): + if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("Node not reachable by ping") if not newbie_singlehomed: # check reachability from my secondary ip to newbie's secondary ip - if not utils.TcpPing(myself.secondary_ip, - secondary_ip, - constants.DEFAULT_NODED_PORT): + if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, + source=myself.secondary_ip): raise errors.OpPrereqError("Node secondary ip not reachable by TCP" " based ping to noded port") @@ -1608,7 +1607,7 @@ class LUAddNode(LogicalUnit): dist_nodes.remove(myself.name) logger.Debug("Copying hosts and known_hosts to all nodes") - for fname in ("/etc/hosts", constants.SSH_KNOWN_HOSTS_FILE): + for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE): result = rpc.call_upload_file(dist_nodes, fname) for to_node in dist_nodes: if not result[to_node]: @@ -1860,23 +1859,41 @@ def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False): """ device_info = [] disks_ok = True + iname = instance.name + # With the two passes mechanism we try to reduce the window of + # opportunity for the race condition of switching DRBD to primary + # before handshaking occured, but we do not eliminate it + + # The proper fix would be to wait (with some limits) until the + # connection has been made and drbd transitions from WFConnection + # into any other network-connected state (Connected, SyncTarget, + # SyncSource, etc.) + + # 1st pass, assemble on all nodes in secondary mode for inst_disk in instance.disks: - master_result = None for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): cfg.SetDiskID(node_disk, node) - is_primary = node == instance.primary_node - result = rpc.call_blockdev_assemble(node, node_disk, - instance.name, is_primary) + result = rpc.call_blockdev_assemble(node, node_disk, iname, False) if not result: logger.Error("could not prepare block device %s on node %s" - " (is_primary=%s)" % - (inst_disk.iv_name, node, is_primary)) - if is_primary or not ignore_secondaries: + " (is_primary=False, pass=1)" % (inst_disk.iv_name, node)) + if not ignore_secondaries: disks_ok = False - if is_primary: - master_result = result - device_info.append((instance.primary_node, inst_disk.iv_name, - master_result)) + + # FIXME: race condition on drbd migration to primary + + # 2nd pass, do only the primary node + for inst_disk in instance.disks: + for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node): + if node != instance.primary_node: + continue + cfg.SetDiskID(node_disk, node) + result = rpc.call_blockdev_assemble(node, node_disk, iname, True) + if not result: + logger.Error("could not prepare block device %s on node %s" + " (is_primary=True, pass=2)" % (inst_disk.iv_name, node)) + disks_ok = False + device_info.append((instance.primary_node, inst_disk.iv_name, result)) # leave the disks configured for the primary node # this is a workaround that would be fixed better by @@ -2297,6 +2314,11 @@ class LURenameInstance(LogicalUnit): name_info = utils.HostInfo(self.op.new_name) self.op.new_name = new_name = name_info.name + instance_list = self.cfg.GetInstanceList() + if new_name in instance_list: + raise errors.OpPrereqError("Instance '%s' is already in the cluster" % + instance_name) + if not getattr(self.op, "ignore_ip", False): command = ["fping", "-q", name_info.ip] result = utils.RunCmd(command) @@ -2400,7 +2422,7 @@ class LUQueryInstances(NoHooksLU): This checks that the fields required are valid output fields. """ - self.dynamic_fields = frozenset(["oper_state", "oper_ram"]) + self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"]) _CheckOutputFields(static=["name", "os", "pnode", "snodes", "admin_state", "admin_ram", "disk_template", "ip", "mac", "bridge", @@ -2457,6 +2479,21 @@ class LUQueryInstances(NoHooksLU): val = None else: val = bool(live_data.get(instance.name)) + elif field == "status": + if instance.primary_node in bad_nodes: + val = "ERROR_nodedown" + else: + running = bool(live_data.get(instance.name)) + if running: + if instance.status != "down": + val = "running" + else: + val = "ERROR_up" + else: + if instance.status != "down": + val = "ERROR_down" + else: + val = "ADMIN_down" elif field == "admin_ram": val = instance.memory elif field == "oper_ram": @@ -2708,9 +2745,9 @@ def _GenerateDiskTemplate(cfg, template_name, #TODO: compute space requirements vgname = cfg.GetVGName() - if template_name == "diskless": + if template_name == constants.DT_DISKLESS: disks = [] - elif template_name == "plain": + elif template_name == constants.DT_PLAIN: if len(secondary_nodes) != 0: raise errors.ProgrammerError("Wrong template configuration") @@ -2722,7 +2759,7 @@ def _GenerateDiskTemplate(cfg, template_name, logical_id=(vgname, names[1]), iv_name = "sdb") disks = [sda_dev, sdb_dev] - elif template_name == "local_raid1": + elif template_name == constants.DT_LOCAL_RAID1: if len(secondary_nodes) != 0: raise errors.ProgrammerError("Wrong template configuration") @@ -2878,7 +2915,7 @@ class LUCreateInstance(LogicalUnit): os_type=self.op.os_type, memory=self.op.mem_size, vcpus=self.op.vcpus, - nics=[(self.inst_ip, self.op.bridge)], + nics=[(self.inst_ip, self.op.bridge, self.op.mac)], )) nl = ([self.sstore.GetMasterNode(), self.op.pnode] + @@ -3034,8 +3071,7 @@ class LUCreateInstance(LogicalUnit): " adding an instance in start mode") if self.op.ip_check: - if utils.TcpPing(utils.HostInfo().name, hostname1.ip, - constants.DEFAULT_NODED_PORT): + if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("IP %s of instance %s already in use" % (hostname1.ip, instance_name)) @@ -3060,8 +3096,8 @@ class LUCreateInstance(LogicalUnit): # boot order verification if self.op.hvm_boot_order is not None: if len(self.op.hvm_boot_order.strip("acdn")) != 0: - raise errors.OpPrereqError("invalid boot order specified," - " must be one or more of [acdn]") + raise errors.OpPrereqError("invalid boot order specified," + " must be one or more of [acdn]") if self.op.start: self.instance_status = 'up' @@ -3723,7 +3759,7 @@ class LUReplaceDisks(LogicalUnit): # ok, we created the new LVs, so now we know we have the needed # storage; as such, we proceed on the target node to rename # old_lv to _old, and new_lv to old_lv; note that we rename LVs - # using the assumption than logical_id == physical_id (which in + # using the assumption that logical_id == physical_id (which in # turn is the unique_id on that node) # FIXME(iustin): use a better name for the replaced LVs @@ -3995,7 +4031,7 @@ class LUQueryInstanceData(NoHooksLU): instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name)) if instance is None: raise errors.OpPrereqError("No such instance name '%s'" % name) - self.wanted_instances.append(instance) + self.wanted_instances.append(instance) else: self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name in self.cfg.GetInstanceList()] @@ -4098,7 +4134,7 @@ class LUSetInstanceParms(LogicalUnit): args['memory'] = self.mem if self.vcpus: args['vcpus'] = self.vcpus - if self.do_ip or self.do_bridge: + if self.do_ip or self.do_bridge or self.mac: if self.do_ip: ip = self.ip else: @@ -4107,7 +4143,11 @@ class LUSetInstanceParms(LogicalUnit): bridge = self.bridge else: bridge = self.instance.nics[0].bridge - args['nics'] = [(ip, bridge)] + if self.mac: + mac = self.mac + else: + mac = self.instance.nics[0].mac + args['nics'] = [(ip, bridge, mac)] env = _BuildInstanceHookEnvByObject(self.instance, override=args) nl = [self.sstore.GetMasterNode(), self.instance.primary_node] + list(self.instance.secondary_nodes) @@ -4524,3 +4564,39 @@ class LUDelTags(TagsLU): raise errors.OpRetryError("There has been a modification to the" " config file and the operation has been" " aborted. Please retry.") + +class LUTestDelay(NoHooksLU): + """Sleep for a specified amount of time. + + This LU sleeps on the master and/or nodes for a specified amoutn of + time. + + """ + _OP_REQP = ["duration", "on_master", "on_nodes"] + + def CheckPrereq(self): + """Check prerequisites. + + This checks that we have a good list of nodes and/or the duration + is valid. + + """ + + if self.op.on_nodes: + self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes) + + def Exec(self, feedback_fn): + """Do the actual sleep. + + """ + if self.op.on_master: + if not utils.TestDelay(self.op.duration): + raise errors.OpExecError("Error during master delay test") + if self.op.on_nodes: + result = rpc.call_test_delay(self.op.on_nodes, self.op.duration) + if not result: + raise errors.OpExecError("Complete failure from rpc call") + for node, node_result in result.items(): + if not node_result: + raise errors.OpExecError("Failure during rpc call to node %s," + " result: %s" % (node, node_result))