doc/design-monitoring-agent.rst \
doc/design-virtual-clusters.rst \
doc/design-x509-ca.rst \
+ doc/design-hroller.rst \
doc/devnotes.rst \
doc/glossary.rst \
doc/hooks.rst \
# All Haskell non-test programs to be compiled but not automatically installed
HS_PROGS = $(HS_BIN_PROGS) $(HS_MYEXECLIB_PROGS)
-HS_BIN_ROLES = hbal hscan hspace hinfo hcheck hroller
+HS_BIN_ROLES = harep hbal hscan hspace hinfo hcheck hroller
HS_HTOOLS_PROGS = $(HS_BIN_ROLES) hail
HS_ALL_PROGS = \
src/Ganeti/HTools/Node.hs \
src/Ganeti/HTools/PeerMap.hs \
src/Ganeti/HTools/Program/Hail.hs \
+ src/Ganeti/HTools/Program/Harep.hs \
src/Ganeti/HTools/Program/Hbal.hs \
src/Ganeti/HTools/Program/Hcheck.hs \
src/Ganeti/HTools/Program/Hinfo.hs \
src/Ganeti/Query/Group.hs \
src/Ganeti/Query/Job.hs \
src/Ganeti/Query/Language.hs \
+ src/Ganeti/Query/Network.hs \
src/Ganeti/Query/Node.hs \
src/Ganeti/Query/Query.hs \
src/Ganeti/Query/Server.hs \
test/data/proc_drbd83_sync.txt \
test/data/proc_drbd83_sync_want.txt \
test/data/proc_drbd83_sync_krnl2.6.39.txt \
+ test/data/qa-minimal-nodes-instances-only.json \
test/data/sys_drbd_usermode_helper.txt \
test/data/vgreduce-removemissing-2.02.02.txt \
test/data/vgreduce-removemissing-2.02.66-fail.txt \
test/data/vgreduce-removemissing-2.02.66-ok.txt \
test/data/vgs-missing-pvs-2.02.02.txt \
test/data/vgs-missing-pvs-2.02.66.txt \
+ test/data/xen-xm-info-4.0.1.txt \
+ test/data/xen-xm-list-4.0.1-dom0-only.txt \
+ test/data/xen-xm-list-4.0.1-four-instances.txt \
test/py/ganeti-cli.test \
test/py/gnt-cli.test \
test/py/import-export_unittest-helper
design-autorepair.rst
design-partitioned.rst
design-monitoring-agent.rst
+ design-hroller.rst
.. vim: set textwidth=72 :
.. Local Variables:
--- /dev/null
+============
+HRoller tool
+============
+
+.. contents:: :depth: 4
+
+This is a design document detailing the cluster maintenance scheduler,
+HRoller.
+
+
+Current state and shortcomings
+==============================
+
+To enable automating cluster-wide reboots a new htool, called HRoller,
+was added to Ganeti starting from version 2.7. This tool helps
+parallelizing cluster offline maintenances by calculating which nodes
+are not both primary and secondary for a DRBD instance, and thus can be
+rebooted at the same time, when all instances are down.
+
+The way this is done is documented in the :manpage:`hroller(1)` manpage.
+
+We would now like to perform online maintenance on the cluster by
+rebooting nodes after evacuating their primary instances (rolling
+reboots).
+
+Proposed changes
+================
+
+
+Calculating rolling maintenances
+--------------------------------
+
+In order to perform rolling maintenance we need to migrate instances off
+the nodes before a reboot. How this can be done depends on the
+instance's disk template and status:
+
+Down instances
+++++++++++++++
+
+If an instance was shutdown when the maintenance started it will be
+ignored. This allows avoiding needlessly moving its primary around,
+since it won't suffer a downtime anyway.
+
+
+DRBD
+++++
+
+Each node must migrate all instances off to their secondaries, and then
+can either be rebooted, or the secondaries can be evacuated as well.
+
+Since currently doing a ``replace-disks`` on DRBD breaks redundancy,
+it's not any safer than temporarily rebooting a node with secondaries on
+them (citation needed). As such we'll implement for now just the
+"migrate+reboot" mode, and focus later on replace-disks as well.
+
+In order to do that we can use the following algorithm:
+
+1) Compute node sets that don't contain both the primary and the
+secondary for any instance. This can be done already by the current
+hroller graph coloring algorithm: nodes are in the same set (color) if
+and only if no edge (instance) exists between them (see the
+:manpage:`hroller(1)` manpage for more details).
+2) Inside each node set calculate subsets that don't have any secondary
+node in common (this can be done by creating a graph of nodes that are
+connected if and only if an instance on both has the same secondary
+node, and coloring that graph)
+3) It is then possible to migrate in parallel all nodes in a subset
+created at step 2, and then reboot/perform maintenance on them, and
+migrate back their original primaries, which allows the computation
+above to be reused for each following subset without N+1 failures being
+triggered, if none were present before. See below about the actual
+execution of the maintenance.
+
+Non-DRBD
+++++++++
+
+All non-DRBD disk templates that can be migrated have no "secondary"
+concept. As such instances can be migrated to any node (in the same
+nodegroup). In order to do the job we can either:
+
+- Perform migrations on one node at a time, perform the maintenance on
+ that node, and proceed (the node will then be targeted again to host
+ instances automatically, as hail chooses targets for the instances
+ between all nodes in a group. Nodes in different nodegroups can be
+ handled in parallel.
+- Perform migrations on one node at a time, but without waiting for the
+ first node to come back before proceeding. This allows us to continue,
+ restricting the cluster, until no more capacity in the nodegroup is
+ available, and then having to wait for some nodes to come back so that
+ capacity is available again for the last few nodes.
+- Pre-Calculate sets of nodes that can be migrated together (probably
+ with a greedy algorithm) and parallelize between them, with the
+ migrate-back approach discussed for DRBD to perform the calculation
+ only once.
+
+Note that for non-DRBD disks that still use local storage (eg. RBD and
+plain) redundancy might break anyway, and nothing except the first
+algorithm might be safe. This perhaps would be a good reason to consider
+managing better RBD pools, if those are implemented on top of nodes
+storage, rather than on dedicated storage machines.
+
+Executing rolling maintenances
+------------------------------
+
+Hroller accepts commands to run to do maintenance automatically. These
+are going to be run on the machine hroller runs on, and take a node name
+as input. They have then to gain access to the target node (via ssh,
+restricted commands, or some other means) and perform their duty.
+
+1) A command (--check-cmd) will be called on all selected online nodes
+to check whether a node needs maintenance. Hroller will proceed only on
+nodes that respond positively to this invocation.
+FIXME: decide about -D
+2) Hroller will evacuate the node of all primary instances.
+3) A command (--maint-cmd) will be called on a node to do the actual
+maintenance operation. It should do any operation needed to perform the
+maintenance including triggering the actual reboot.
+3) A command (--verify-cmd) will be called to check that the operation
+was successful, it has to wait until the target node is back up (and
+decide after how long it should give up) and perform the verification.
+If it's not successful hroller will stop and not proceed with other
+nodes.
+4) The master node will be kept last, but will not otherwise be treated
+specially. If hroller was running on the master node, care must be
+exercised as its maintenance will have interrupted the software itself,
+and as such the verification step will not happen. This will not
+automatically be taken care of, in the first version. An additional flag
+to just skip the master node will be present as well, in case that's
+preferred.
+
+
+Future work
+===========
+
+DRBD nodes' ``replace-disks``' functionality should be implemented. Note
+that when we will support a DRBD version that allows multi-secondary
+this can be done safely, without losing replication at any time, by
+adding a temporary secondary and only when the sync is finished dropping
+the previous one.
+
+If/when RBD pools can be managed inside Ganeti, care can be taken so
+that the pool is evacuated as well from a node before it's put into
+maintenance. This is equivalent to evacuating DRBD secondaries.
+
+Master failovers during the maintenance should be performed by hroller.
+This requires RPC/RAPI support for master failover. Hroller should also
+be modified to better support running on the master itself and
+continuing on the new master.
+
+.. vim: set textwidth=72 :
+.. Local Variables:
+.. mode: rst
+.. fill-column: 72
+.. End:
"tags": (",".join, False),
}
+ cl = GetClient()
return GenericList(constants.QR_NETWORK, desired_fields, args, None,
opts.separator, not opts.no_headers,
- verbose=opts.verbose, format_override=fmtoverride)
+ verbose=opts.verbose, format_override=fmtoverride,
+ cl=cl)
def ListNetworkFields(opts, args):
#
#
-# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
fmtoverride = dict.fromkeys(["pinst_list", "sinst_list", "tags"],
(",".join, False))
- cl = GetClient(query=False)
+ cl = GetClient(query=True)
return GenericList(constants.QR_NODE, selected_fields, args, opts.units,
opts.separator, not opts.no_headers,
@return: the desired exit code
"""
- # note: if this starts using RPC fields, and we haven't yet fixed
- # hconfd, then we should revert to query=False
cl = GetClient(query=True)
result = cl.QueryNodes(fields=["name", "pip", "sip",
"pinst_list", "sinst_list",
XEN_CMD_XL = "xl"
# FIXME: This will be made configurable using hvparams in Ganeti 2.7
XEN_CMD = _autoconf.XEN_CMD
+
+KNOWN_XEN_COMMANDS = compat.UniqueFrozenset([
+ XEN_CMD_XM,
+ XEN_CMD_XL,
+ ])
+
# When the Xen toolstack used is "xl", live migration requires the source host
# to connect to the target host via ssh (xl runs this command). We need to pass
# the command xl runs some extra info so that it can use Ganeti's key
])
-def _ProbeTapVnetHdr(fd):
+def _GetTunFeatures(fd, _ioctl=fcntl.ioctl):
+ """Retrieves supported TUN features from file descriptor.
+
+ @see: L{_ProbeTapVnetHdr}
+
+ """
+ req = struct.pack("I", 0)
+ try:
+ buf = _ioctl(fd, TUNGETFEATURES, req)
+ except EnvironmentError, err:
+ logging.warning("ioctl(TUNGETFEATURES) failed: %s", err)
+ return None
+ else:
+ (flags, ) = struct.unpack("I", buf)
+ return flags
+
+
+def _ProbeTapVnetHdr(fd, _features_fn=_GetTunFeatures):
"""Check whether to enable the IFF_VNET_HDR flag.
To do this, _all_ of the following conditions must be met:
@param fd: the file descriptor of /dev/net/tun
"""
- req = struct.pack("I", 0)
- try:
- res = fcntl.ioctl(fd, TUNGETFEATURES, req)
- except EnvironmentError:
- logging.warning("TUNGETFEATURES ioctl() not implemented")
- return False
+ flags = _features_fn(fd)
- tunflags = struct.unpack("I", res)[0]
- if tunflags & IFF_VNET_HDR:
- return True
- else:
- logging.warning("Host does not support IFF_VNET_HDR, not enabling")
+ if flags is None:
+ # Not supported
return False
+ result = bool(flags & IFF_VNET_HDR)
+
+ if not result:
+ logging.warning("Kernel does not support IFF_VNET_HDR, not enabling")
+
+ return result
+
def _OpenTap(vnet_hdr=True):
"""Open a new tap device and return its file descriptor.
greeting = self._Recv()
if not greeting[self._FIRST_MESSAGE_KEY]:
self._connected = False
- raise errors.HypervisorError("kvm: qmp communication error (wrong"
+ raise errors.HypervisorError("kvm: QMP communication error (wrong"
" server greeting")
# Let's put the monitor in command mode using the qmp_capabilities
constants.HV_VNC_BIND_ADDRESS:
(False, lambda x: (netutils.IP4Address.IsValid(x) or
utils.IsNormAbsPath(x)),
- "the VNC bind address must be either a valid IP address or an absolute"
+ "The VNC bind address must be either a valid IP address or an absolute"
" pathname", None, None),
constants.HV_VNC_TLS: hv_base.NO_CHECK,
constants.HV_VNC_X509: hv_base.OPT_DIR_CHECK,
constants.HV_KVM_SPICE_IP_VERSION:
(False, lambda x: (x == constants.IFACE_NO_IP_VERSION_SPECIFIED or
x in constants.VALID_IP_VERSIONS),
- "the SPICE IP version should be 4 or 6",
+ "The SPICE IP version should be 4 or 6",
None, None),
constants.HV_KVM_SPICE_PASSWORD_FILE: hv_base.OPT_FILE_CHECK,
constants.HV_KVM_SPICE_LOSSLESS_IMG_COMPR:
result = utils.RunCmd([pathutils.KVM_IFUP, tap], env=env)
if result.failed:
- raise errors.HypervisorError("Failed to configure interface %s: %s."
- " Network configuration script output: %s" %
+ raise errors.HypervisorError("Failed to configure interface %s: %s;"
+ " network configuration script output: %s" %
(tap, result.fail_reason, result.output))
@staticmethod
else:
vnc_arg = "%s:%d" % (vnc_bind_address, display)
else:
- logging.error("Network port is not a valid VNC display (%d < %d)."
- " Not starting VNC", instance.network_port,
- constants.VNC_BASE_PORT)
+ logging.error("Network port is not a valid VNC display (%d < %d),"
+ " not starting VNC",
+ instance.network_port, constants.VNC_BASE_PORT)
vnc_arg = "none"
# Only allow tls and other option when not binding to a file, for now.
# have that kind of IP addresses, throw an exception
if spice_ip_version != constants.IFACE_NO_IP_VERSION_SPECIFIED:
if not addresses[spice_ip_version]:
- raise errors.HypervisorError("spice: unable to get an IPv%s address"
+ raise errors.HypervisorError("SPICE: Unable to get an IPv%s address"
" for %s" % (spice_ip_version,
spice_bind))
elif addresses[constants.IP6_VERSION]:
spice_ip_version = constants.IP6_VERSION
else:
- raise errors.HypervisorError("spice: unable to get an IP address"
+ raise errors.HypervisorError("SPICE: Unable to get an IP address"
" for %s" % (spice_bind))
spice_address = addresses[spice_ip_version][0]
# Enable the spice agent communication channel between the host and the
# agent.
kvm_cmd.extend(["-device", "virtio-serial-pci"])
- kvm_cmd.extend(["-device", "virtserialport,chardev=spicechannel0,"
- "name=com.redhat.spice.0"])
+ kvm_cmd.extend([
+ "-device",
+ "virtserialport,chardev=spicechannel0,name=com.redhat.spice.0",
+ ])
kvm_cmd.extend(["-chardev", "spicevmc,id=spicechannel0,name=vdagent"])
logging.info("KVM: SPICE will listen on port %s", instance.network_port)
"""Invoke a command on the instance monitor.
"""
+ # TODO: Replace monitor calls with QMP once KVM >= 0.14 is the minimum
+ # version. The monitor protocol is designed for human consumption, whereas
+ # QMP is made for programmatic usage. In the worst case QMP can also
+ # execute monitor commands. As it is, all calls to socat take at least
+ # 500ms and likely more: socat can't detect the end of the reply and waits
+ # for 500ms of no data received before exiting (500 ms is the default for
+ # the "-t" parameter).
socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" %
(utils.ShellQuote(command),
constants.SOCAT_PATH,
utils.ShellQuote(self._InstanceMonitor(instance_name))))
result = utils.RunCmd(socat)
if result.failed:
- msg = ("Failed to send command '%s' to instance %s."
- " output: %s, error: %s, fail_reason: %s" %
- (command, instance_name,
- result.stdout, result.stderr, result.fail_reason))
+ msg = ("Failed to send command '%s' to instance '%s', reason '%s',"
+ " output: %s" %
+ (command, instance_name, result.fail_reason, result.output))
raise errors.HypervisorError(msg)
return result
# IP of that family
if (netutils.IP4Address.IsValid(spice_bind) and
spice_ip_version != constants.IP4_VERSION):
- raise errors.HypervisorError("spice: got an IPv4 address (%s), but"
+ raise errors.HypervisorError("SPICE: Got an IPv4 address (%s), but"
" the specified IP version is %s" %
(spice_bind, spice_ip_version))
if (netutils.IP6Address.IsValid(spice_bind) and
spice_ip_version != constants.IP6_VERSION):
- raise errors.HypervisorError("spice: got an IPv6 address (%s), but"
+ raise errors.HypervisorError("SPICE: Got an IPv6 address (%s), but"
" the specified IP version is %s" %
(spice_bind, spice_ip_version))
else:
# error if any of them is set without it.
for param in _SPICE_ADDITIONAL_PARAMS:
if hvparams[param]:
- raise errors.HypervisorError("spice: %s requires %s to be set" %
+ raise errors.HypervisorError("SPICE: %s requires %s to be set" %
(param, constants.HV_KVM_SPICE_BIND))
@classmethod
if spice_bind:
# only one of VNC and SPICE can be used currently.
if hvparams[constants.HV_VNC_BIND_ADDRESS]:
- raise errors.HypervisorError("both SPICE and VNC are configured, but"
+ raise errors.HypervisorError("Both SPICE and VNC are configured, but"
" only one of them can be used at a"
- " given time.")
+ " given time")
# check that KVM supports SPICE
kvmhelp = cls._GetKVMOutput(kvm_path, cls._KVMOPT_HELP)
if not cls._SPICE_RE.search(kvmhelp):
- raise errors.HypervisorError("spice is configured, but it is not"
- " supported according to kvm --help")
+ raise errors.HypervisorError("SPICE is configured, but it is not"
+ " supported according to 'kvm --help'")
# if spice_bind is not an IP address, it must be a valid interface
- bound_to_addr = (netutils.IP4Address.IsValid(spice_bind)
- or netutils.IP6Address.IsValid(spice_bind))
+ bound_to_addr = (netutils.IP4Address.IsValid(spice_bind) or
+ netutils.IP6Address.IsValid(spice_bind))
if not bound_to_addr and not netutils.IsValidInterface(spice_bind):
- raise errors.HypervisorError("spice: the %s parameter must be either"
+ raise errors.HypervisorError("SPICE: The %s parameter must be either"
" a valid IP address or interface name" %
constants.HV_KVM_SPICE_BIND)
"""
import logging
+import string # pylint: disable=W0402
from cStringIO import StringIO
from ganeti import constants
VIF_BRIDGE_SCRIPT = utils.PathJoin(pathutils.XEN_CONFIG_DIR,
"scripts/vif-bridge")
_DOM0_NAME = "Domain-0"
+_DISK_LETTERS = string.ascii_lowercase
+
+_FILE_DRIVER_MAP = {
+ constants.FD_LOOP: "file",
+ constants.FD_BLKTAP: "tap:aio",
+ }
def _CreateConfigCpus(cpu_mask):
return "cpus = [ %s ]" % ", ".join(map(_GetCPUMap, cpu_list))
+def _RunXmList(fn, xmllist_errors):
+ """Helper function for L{_GetXmList} to run "xm list".
+
+ @type fn: callable
+ @param fn: Function returning result of running C{xm list}
+ @type xmllist_errors: list
+ @param xmllist_errors: Error list
+ @rtype: list
+
+ """
+ result = fn()
+ if result.failed:
+ logging.error("xm list failed (%s): %s", result.fail_reason,
+ result.output)
+ xmllist_errors.append(result)
+ raise utils.RetryAgain()
+
+ # skip over the heading
+ return result.stdout.splitlines()
+
+
+def _ParseXmList(lines, include_node):
+ """Parses the output of C{xm list}.
+
+ @type lines: list
+ @param lines: Output lines of C{xm list}
+ @type include_node: boolean
+ @param include_node: If True, return information for Dom0
+ @return: list of tuple containing (name, id, memory, vcpus, state, time
+ spent)
+
+ """
+ result = []
+
+ # Iterate through all lines while ignoring header
+ for line in lines[1:]:
+ # The format of lines is:
+ # Name ID Mem(MiB) VCPUs State Time(s)
+ # Domain-0 0 3418 4 r----- 266.2
+ data = line.split()
+ if len(data) != 6:
+ raise errors.HypervisorError("Can't parse output of xm list,"
+ " line: %s" % line)
+ try:
+ data[1] = int(data[1])
+ data[2] = int(data[2])
+ data[3] = int(data[3])
+ data[5] = float(data[5])
+ except (TypeError, ValueError), err:
+ raise errors.HypervisorError("Can't parse output of xm list,"
+ " line: %s, error: %s" % (line, err))
+
+ # skip the Domain-0 (optional)
+ if include_node or data[0] != _DOM0_NAME:
+ result.append(data)
+
+ return result
+
+
+def _GetXmList(fn, include_node, _timeout=5):
+ """Return the list of running instances.
+
+ See L{_RunXmList} and L{_ParseXmList} for parameter details.
+
+ """
+ xmllist_errors = []
+ try:
+ lines = utils.Retry(_RunXmList, (0.3, 1.5, 1.0), _timeout,
+ args=(fn, xmllist_errors))
+ except utils.RetryTimeout:
+ if xmllist_errors:
+ xmlist_result = xmllist_errors.pop()
+
+ errmsg = ("xm list failed, timeout exceeded (%s): %s" %
+ (xmlist_result.fail_reason, xmlist_result.output))
+ else:
+ errmsg = "xm list failed"
+
+ raise errors.HypervisorError(errmsg)
+
+ return _ParseXmList(lines, include_node)
+
+
+def _ParseNodeInfo(info):
+ """Return information about the node.
+
+ @return: a dict with the following keys (memory values in MiB):
+ - memory_total: the total memory size on the node
+ - memory_free: the available memory on the node for instances
+ - nr_cpus: total number of CPUs
+ - nr_nodes: in a NUMA system, the number of domains
+ - nr_sockets: the number of physical CPU sockets in the node
+ - hv_version: the hypervisor version in the form (major, minor)
+
+ """
+ result = {}
+ cores_per_socket = threads_per_core = nr_cpus = None
+ xen_major, xen_minor = None, None
+ memory_total = None
+ memory_free = None
+
+ for line in info.splitlines():
+ fields = line.split(":", 1)
+
+ if len(fields) < 2:
+ continue
+
+ (key, val) = map(lambda s: s.strip(), fields)
+
+ # Note: in Xen 3, memory has changed to total_memory
+ if key in ("memory", "total_memory"):
+ memory_total = int(val)
+ elif key == "free_memory":
+ memory_free = int(val)
+ elif key == "nr_cpus":
+ nr_cpus = result["cpu_total"] = int(val)
+ elif key == "nr_nodes":
+ result["cpu_nodes"] = int(val)
+ elif key == "cores_per_socket":
+ cores_per_socket = int(val)
+ elif key == "threads_per_core":
+ threads_per_core = int(val)
+ elif key == "xen_major":
+ xen_major = int(val)
+ elif key == "xen_minor":
+ xen_minor = int(val)
+
+ if None not in [cores_per_socket, threads_per_core, nr_cpus]:
+ result["cpu_sockets"] = nr_cpus / (cores_per_socket * threads_per_core)
+
+ if memory_free is not None:
+ result["memory_free"] = memory_free
+
+ if memory_total is not None:
+ result["memory_total"] = memory_total
+
+ if not (xen_major is None or xen_minor is None):
+ result[constants.HV_NODEINFO_KEY_VERSION] = (xen_major, xen_minor)
+
+ return result
+
+
+def _MergeInstanceInfo(info, fn):
+ """Updates node information from L{_ParseNodeInfo} with instance info.
+
+ @type info: dict
+ @param info: Result from L{_ParseNodeInfo}
+ @type fn: callable
+ @param fn: Function returning result of running C{xm list}
+ @rtype: dict
+
+ """
+ total_instmem = 0
+
+ for (name, _, mem, vcpus, _, _) in fn(True):
+ if name == _DOM0_NAME:
+ info["memory_dom0"] = mem
+ info["dom0_cpus"] = vcpus
+
+ # Include Dom0 in total memory usage
+ total_instmem += mem
+
+ memory_free = info.get("memory_free")
+ memory_total = info.get("memory_total")
+
+ # Calculate memory used by hypervisor
+ if None not in [memory_total, memory_free, total_instmem]:
+ info["memory_hv"] = memory_total - memory_free - total_instmem
+
+ return info
+
+
+def _GetNodeInfo(info, fn):
+ """Combines L{_MergeInstanceInfo} and L{_ParseNodeInfo}.
+
+ """
+ return _MergeInstanceInfo(_ParseNodeInfo(info), fn)
+
+
+def _GetConfigFileDiskData(block_devices, blockdev_prefix,
+ _letters=_DISK_LETTERS):
+ """Get disk directives for Xen config file.
+
+ This method builds the xen config disk directive according to the
+ given disk_template and block_devices.
+
+ @param block_devices: list of tuples (cfdev, rldev):
+ - cfdev: dict containing ganeti config disk part
+ - rldev: ganeti.bdev.BlockDev object
+ @param blockdev_prefix: a string containing blockdevice prefix,
+ e.g. "sd" for /dev/sda
+
+ @return: string containing disk directive for xen instance config file
+
+ """
+ if len(block_devices) > len(_letters):
+ raise errors.HypervisorError("Too many disks")
+
+ disk_data = []
+
+ for sd_suffix, (cfdev, dev_path) in zip(_letters, block_devices):
+ sd_name = blockdev_prefix + sd_suffix
+
+ if cfdev.mode == constants.DISK_RDWR:
+ mode = "w"
+ else:
+ mode = "r"
+
+ if cfdev.dev_type == constants.LD_FILE:
+ driver = _FILE_DRIVER_MAP[cfdev.physical_id[0]]
+ else:
+ driver = "phy"
+
+ disk_data.append("'%s:%s,%s,%s'" % (driver, dev_path, sd_name, mode))
+
+ return disk_data
+
+
class XenHypervisor(hv_base.BaseHypervisor):
"""Xen generic hypervisor interface
XL_CONFIG_FILE,
]
- @staticmethod
- def _ConfigFileName(instance_name):
+ def __init__(self, _cfgdir=None, _run_cmd_fn=None, _cmd=None):
+ hv_base.BaseHypervisor.__init__(self)
+
+ if _cfgdir is None:
+ self._cfgdir = pathutils.XEN_CONFIG_DIR
+ else:
+ self._cfgdir = _cfgdir
+
+ if _run_cmd_fn is None:
+ self._run_cmd_fn = utils.RunCmd
+ else:
+ self._run_cmd_fn = _run_cmd_fn
+
+ self._cmd = _cmd
+
+ def _GetCommand(self):
+ """Returns Xen command to use.
+
+ """
+ if self._cmd is None:
+ # TODO: Make command a hypervisor parameter
+ cmd = constants.XEN_CMD
+ else:
+ cmd = self._cmd
+
+ if cmd not in constants.KNOWN_XEN_COMMANDS:
+ raise errors.ProgrammerError("Unknown Xen command '%s'" % cmd)
+
+ return cmd
+
+ def _RunXen(self, args):
+ """Wrapper around L{utils.process.RunCmd} to run Xen command.
+
+ @see: L{utils.process.RunCmd}
+
+ """
+ cmd = [self._GetCommand()]
+ cmd.extend(args)
+
+ return self._run_cmd_fn(cmd)
+
+ def _ConfigFileName(self, instance_name):
"""Get the config file name for an instance.
@param instance_name: instance name
@rtype: str
"""
- return utils.PathJoin(pathutils.XEN_CONFIG_DIR, instance_name)
+ return utils.PathJoin(self._cfgdir, instance_name)
@classmethod
- def _WriteConfigFile(cls, instance, startup_memory, block_devices):
- """Write the Xen config file for the instance.
+ def _GetConfig(cls, instance, startup_memory, block_devices):
+ """Build Xen configuration for an instance.
"""
raise NotImplementedError
- @staticmethod
- def _WriteConfigFileStatic(instance_name, data):
+ def _WriteConfigFile(self, instance_name, data):
"""Write the Xen config file for the instance.
This version of the function just writes the config file from static data.
"""
# just in case it exists
- utils.RemoveFile(utils.PathJoin(pathutils.XEN_CONFIG_DIR, "auto",
- instance_name))
+ utils.RemoveFile(utils.PathJoin(self._cfgdir, "auto", instance_name))
- cfg_file = XenHypervisor._ConfigFileName(instance_name)
+ cfg_file = self._ConfigFileName(instance_name)
try:
utils.WriteFile(cfg_file, data=data)
except EnvironmentError, err:
raise errors.HypervisorError("Cannot write Xen instance configuration"
" file %s: %s" % (cfg_file, err))
- @staticmethod
- def _ReadConfigFile(instance_name):
+ def _ReadConfigFile(self, instance_name):
"""Returns the contents of the instance config file.
"""
- filename = XenHypervisor._ConfigFileName(instance_name)
+ filename = self._ConfigFileName(instance_name)
try:
file_content = utils.ReadFile(filename)
return file_content
- @staticmethod
- def _RemoveConfigFile(instance_name):
+ def _RemoveConfigFile(self, instance_name):
"""Remove the xen configuration file.
"""
- utils.RemoveFile(XenHypervisor._ConfigFileName(instance_name))
+ utils.RemoveFile(self._ConfigFileName(instance_name))
- @staticmethod
- def _RunXmList(xmlist_errors):
- """Helper function for L{_GetXMList} to run "xm list".
+ def _GetXmList(self, include_node):
+ """Wrapper around module level L{_GetXmList}.
"""
- result = utils.RunCmd([constants.XEN_CMD, "list"])
- if result.failed:
- logging.error("xm list failed (%s): %s", result.fail_reason,
- result.output)
- xmlist_errors.append(result)
- raise utils.RetryAgain()
-
- # skip over the heading
- return result.stdout.splitlines()[1:]
-
- @classmethod
- def _GetXMList(cls, include_node):
- """Return the list of running instances.
-
- If the include_node argument is True, then we return information
- for dom0 also, otherwise we filter that from the return value.
-
- @return: list of (name, id, memory, vcpus, state, time spent)
-
- """
- xmlist_errors = []
- try:
- lines = utils.Retry(cls._RunXmList, 1, 5, args=(xmlist_errors, ))
- except utils.RetryTimeout:
- if xmlist_errors:
- xmlist_result = xmlist_errors.pop()
-
- errmsg = ("xm list failed, timeout exceeded (%s): %s" %
- (xmlist_result.fail_reason, xmlist_result.output))
- else:
- errmsg = "xm list failed"
-
- raise errors.HypervisorError(errmsg)
-
- result = []
- for line in lines:
- # The format of lines is:
- # Name ID Mem(MiB) VCPUs State Time(s)
- # Domain-0 0 3418 4 r----- 266.2
- data = line.split()
- if len(data) != 6:
- raise errors.HypervisorError("Can't parse output of xm list,"
- " line: %s" % line)
- try:
- data[1] = int(data[1])
- data[2] = int(data[2])
- data[3] = int(data[3])
- data[5] = float(data[5])
- except (TypeError, ValueError), err:
- raise errors.HypervisorError("Can't parse output of xm list,"
- " line: %s, error: %s" % (line, err))
-
- # skip the Domain-0 (optional)
- if include_node or data[0] != _DOM0_NAME:
- result.append(data)
-
- return result
+ return _GetXmList(lambda: self._RunXen(["list"]), include_node)
def ListInstances(self):
"""Get the list of running instances.
"""
- xm_list = self._GetXMList(False)
+ xm_list = self._GetXmList(False)
names = [info[0] for info in xm_list]
return names
@return: tuple (name, id, memory, vcpus, stat, times)
"""
- xm_list = self._GetXMList(instance_name == _DOM0_NAME)
+ xm_list = self._GetXmList(instance_name == _DOM0_NAME)
result = None
for data in xm_list:
if data[0] == instance_name:
@return: list of tuples (name, id, memory, vcpus, stat, times)
"""
- xm_list = self._GetXMList(False)
+ xm_list = self._GetXmList(False)
return xm_list
+ def _MakeConfigFile(self, instance, startup_memory, block_devices):
+ """Gather configuration details and write to disk.
+
+ See L{_GetConfig} for arguments.
+
+ """
+ buf = StringIO()
+ buf.write("# Automatically generated by Ganeti. Do not edit!\n")
+ buf.write("\n")
+ buf.write(self._GetConfig(instance, startup_memory, block_devices))
+ buf.write("\n")
+
+ self._WriteConfigFile(instance.name, buf.getvalue())
+
def StartInstance(self, instance, block_devices, startup_paused):
"""Start an instance.
"""
startup_memory = self._InstanceStartupMemory(instance)
- self._WriteConfigFile(instance, startup_memory, block_devices)
- cmd = [constants.XEN_CMD, "create"]
+
+ self._MakeConfigFile(instance, startup_memory, block_devices)
+
+ cmd = ["create"]
if startup_paused:
- cmd.extend(["-p"])
- cmd.extend([self._ConfigFileName(instance.name)])
- result = utils.RunCmd(cmd)
+ cmd.append("-p")
+ cmd.append(self._ConfigFileName(instance.name))
+ result = self._RunXen(cmd)
if result.failed:
raise errors.HypervisorError("Failed to start instance %s: %s (%s)" %
(instance.name, result.fail_reason,
if name is None:
name = instance.name
+ return self._StopInstance(name, force)
+
+ def _StopInstance(self, name, force):
+ """Stop an instance.
+
+ """
if force:
- command = [constants.XEN_CMD, "destroy", name]
+ action = "destroy"
else:
- command = [constants.XEN_CMD, "shutdown", name]
- result = utils.RunCmd(command)
+ action = "shutdown"
+ result = self._RunXen([action, name])
if result.failed:
raise errors.HypervisorError("Failed to stop instance %s: %s, %s" %
(name, result.fail_reason, result.output))
raise errors.HypervisorError("Failed to reboot instance %s,"
" not running" % instance.name)
- result = utils.RunCmd([constants.XEN_CMD, "reboot", instance.name])
+ result = self._RunXen(["reboot", instance.name])
if result.failed:
raise errors.HypervisorError("Failed to reboot instance %s: %s, %s" %
(instance.name, result.fail_reason,
@param mem: actual memory size to use for instance runtime
"""
- cmd = [constants.XEN_CMD, "mem-set", instance.name, mem]
- result = utils.RunCmd(cmd)
+ result = self._RunXen(["mem-set", instance.name, mem])
if result.failed:
raise errors.HypervisorError("Failed to balloon instance %s: %s (%s)" %
(instance.name, result.fail_reason,
result.output))
+
+ # Update configuration file
cmd = ["sed", "-ie", "s/^memory.*$/memory = %s/" % mem]
- cmd.append(XenHypervisor._ConfigFileName(instance.name))
+ cmd.append(self._ConfigFileName(instance.name))
+
result = utils.RunCmd(cmd)
if result.failed:
raise errors.HypervisorError("Failed to update memory for %s: %s (%s)" %
def GetNodeInfo(self):
"""Return information about the node.
- @return: a dict with the following keys (memory values in MiB):
- - memory_total: the total memory size on the node
- - memory_free: the available memory on the node for instances
- - memory_dom0: the memory used by the node itself, if available
- - nr_cpus: total number of CPUs
- - nr_nodes: in a NUMA system, the number of domains
- - nr_sockets: the number of physical CPU sockets in the node
- - hv_version: the hypervisor version in the form (major, minor)
+ @see: L{_GetNodeInfo} and L{_ParseNodeInfo}
"""
- result = utils.RunCmd([constants.XEN_CMD, "info"])
+ result = self._RunXen(["info"])
if result.failed:
logging.error("Can't run 'xm info' (%s): %s", result.fail_reason,
result.output)
return None
- xmoutput = result.stdout.splitlines()
- result = {}
- cores_per_socket = threads_per_core = nr_cpus = None
- xen_major, xen_minor = None, None
- memory_total = None
- memory_free = None
-
- for line in xmoutput:
- splitfields = line.split(":", 1)
-
- if len(splitfields) > 1:
- key = splitfields[0].strip()
- val = splitfields[1].strip()
-
- # note: in xen 3, memory has changed to total_memory
- if key == "memory" or key == "total_memory":
- memory_total = int(val)
- elif key == "free_memory":
- memory_free = int(val)
- elif key == "nr_cpus":
- nr_cpus = result["cpu_total"] = int(val)
- elif key == "nr_nodes":
- result["cpu_nodes"] = int(val)
- elif key == "cores_per_socket":
- cores_per_socket = int(val)
- elif key == "threads_per_core":
- threads_per_core = int(val)
- elif key == "xen_major":
- xen_major = int(val)
- elif key == "xen_minor":
- xen_minor = int(val)
-
- if None not in [cores_per_socket, threads_per_core, nr_cpus]:
- result["cpu_sockets"] = nr_cpus / (cores_per_socket * threads_per_core)
-
- total_instmem = 0
- for (name, _, mem, vcpus, _, _) in self._GetXMList(True):
- if name == _DOM0_NAME:
- result["memory_dom0"] = mem
- result["dom0_cpus"] = vcpus
-
- # Include Dom0 in total memory usage
- total_instmem += mem
-
- if memory_free is not None:
- result["memory_free"] = memory_free
-
- if memory_total is not None:
- result["memory_total"] = memory_total
-
- # Calculate memory used by hypervisor
- if None not in [memory_total, memory_free, total_instmem]:
- result["memory_hv"] = memory_total - memory_free - total_instmem
-
- if not (xen_major is None or xen_minor is None):
- result[constants.HV_NODEINFO_KEY_VERSION] = (xen_major, xen_minor)
-
- return result
+ return _GetNodeInfo(result.stdout, self._GetXmList)
@classmethod
def GetInstanceConsole(cls, instance, hvparams, beparams):
@return: Problem description if something is wrong, C{None} otherwise
"""
- result = utils.RunCmd([constants.XEN_CMD, "info"])
+ result = self._RunXen(["info"])
if result.failed:
return "'xm info' failed: %s, %s" % (result.fail_reason, result.output)
return None
- @staticmethod
- def _GetConfigFileDiskData(block_devices, blockdev_prefix):
- """Get disk directive for xen config file.
-
- This method builds the xen config disk directive according to the
- given disk_template and block_devices.
-
- @param block_devices: list of tuples (cfdev, rldev):
- - cfdev: dict containing ganeti config disk part
- - rldev: ganeti.bdev.BlockDev object
- @param blockdev_prefix: a string containing blockdevice prefix,
- e.g. "sd" for /dev/sda
-
- @return: string containing disk directive for xen instance config file
-
- """
- FILE_DRIVER_MAP = {
- constants.FD_LOOP: "file",
- constants.FD_BLKTAP: "tap:aio",
- }
- disk_data = []
- if len(block_devices) > 24:
- # 'z' - 'a' = 24
- raise errors.HypervisorError("Too many disks")
- namespace = [blockdev_prefix + chr(i + ord("a")) for i in range(24)]
- for sd_name, (cfdev, dev_path) in zip(namespace, block_devices):
- if cfdev.mode == constants.DISK_RDWR:
- mode = "w"
- else:
- mode = "r"
- if cfdev.dev_type == constants.LD_FILE:
- line = "'%s:%s,%s,%s'" % (FILE_DRIVER_MAP[cfdev.physical_id[0]],
- dev_path, sd_name, mode)
- else:
- line = "'phy:%s,%s,%s'" % (dev_path, sd_name, mode)
- disk_data.append(line)
-
- return disk_data
-
def MigrationInfo(self, instance):
"""Get instance information to perform a migration.
"""
if success:
- self._WriteConfigFileStatic(instance.name, info)
+ self._WriteConfigFile(instance.name, info)
def MigrateInstance(self, instance, target, live):
"""Migrate an instance to a target node.
@param live: perform a live migration
"""
- if self.GetInstanceInfo(instance.name) is None:
+ port = instance.hvparams[constants.HV_MIGRATION_PORT]
+
+ # TODO: Pass cluster name via RPC
+ cluster_name = ssconf.SimpleStore().GetClusterName()
+
+ return self._MigrateInstance(cluster_name, instance.name, target, port,
+ live)
+
+ def _MigrateInstance(self, cluster_name, instance_name, target, port, live,
+ _ping_fn=netutils.TcpPing):
+ """Migrate an instance to a target node.
+
+ @see: L{MigrateInstance} for details
+
+ """
+ if self.GetInstanceInfo(instance_name) is None:
raise errors.HypervisorError("Instance not running, cannot migrate")
- port = instance.hvparams[constants.HV_MIGRATION_PORT]
+ cmd = self._GetCommand()
- if (constants.XEN_CMD == constants.XEN_CMD_XM and
- not netutils.TcpPing(target, port, live_port_needed=True)):
+ if (cmd == constants.XEN_CMD_XM and
+ not _ping_fn(target, port, live_port_needed=True)):
raise errors.HypervisorError("Remote host %s not listening on port"
" %s, cannot migrate" % (target, port))
- args = [constants.XEN_CMD, "migrate"]
- if constants.XEN_CMD == constants.XEN_CMD_XM:
+ args = ["migrate"]
+
+ if cmd == constants.XEN_CMD_XM:
args.extend(["-p", "%d" % port])
if live:
args.append("-l")
- elif constants.XEN_CMD == constants.XEN_CMD_XL:
- cluster_name = ssconf.SimpleStore().GetClusterName()
- args.extend(["-s", constants.XL_SSH_CMD % cluster_name])
- args.extend(["-C", self._ConfigFileName(instance.name)])
+
+ elif cmd == constants.XEN_CMD_XL:
+ args.extend([
+ "-s", constants.XL_SSH_CMD % cluster_name,
+ "-C", self._ConfigFileName(instance_name),
+ ])
+
else:
- raise errors.HypervisorError("Unsupported xen command: %s" %
- constants.XEN_CMD)
+ raise errors.HypervisorError("Unsupported Xen command: %s" % self._cmd)
- args.extend([instance.name, target])
- result = utils.RunCmd(args)
+ args.extend([instance_name, target])
+
+ result = self._RunXen(args)
if result.failed:
raise errors.HypervisorError("Failed to migrate instance %s: %s" %
- (instance.name, result.output))
+ (instance_name, result.output))
def FinalizeMigrationSource(self, instance, success, live):
"""Finalize the instance migration on the source node.
(False, lambda x: 0 < x < 65536, "invalid weight", None, None),
}
- @classmethod
- def _WriteConfigFile(cls, instance, startup_memory, block_devices):
+ def _GetConfig(self, instance, startup_memory, block_devices):
"""Write the Xen config file for the instance.
"""
nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK]
vif_data.append("'%s'" % nic_str)
- disk_data = cls._GetConfigFileDiskData(block_devices,
- hvp[constants.HV_BLOCKDEV_PREFIX])
+ disk_data = \
+ _GetConfigFileDiskData(block_devices, hvp[constants.HV_BLOCKDEV_PREFIX])
config.write("vif = [%s]\n" % ",".join(vif_data))
config.write("disk = [%s]\n" % ",".join(disk_data))
config.write("on_reboot = 'destroy'\n")
config.write("on_crash = 'restart'\n")
config.write("extra = '%s'\n" % hvp[constants.HV_KERNEL_ARGS])
- cls._WriteConfigFileStatic(instance.name, config.getvalue())
- return True
+ return config.getvalue()
class XenHvmHypervisor(XenHypervisor):
(False, lambda x: 0 < x < 65535, "invalid weight", None, None),
}
- @classmethod
- def _WriteConfigFile(cls, instance, startup_memory, block_devices):
+ def _GetConfig(self, instance, startup_memory, block_devices):
"""Create a Xen 3.1 HVM config file.
"""
hvp = instance.hvparams
config = StringIO()
- config.write("# this is autogenerated by Ganeti, please do not edit\n#\n")
# kernel handling
kpath = hvp[constants.HV_KERNEL_PATH]
config.write("vif = [%s]\n" % ",".join(vif_data))
- disk_data = cls._GetConfigFileDiskData(block_devices,
- hvp[constants.HV_BLOCKDEV_PREFIX])
+ disk_data = \
+ _GetConfigFileDiskData(block_devices, hvp[constants.HV_BLOCKDEV_PREFIX])
iso_path = hvp[constants.HV_CDROM_IMAGE_PATH]
if iso_path:
else:
config.write("on_reboot = 'destroy'\n")
config.write("on_crash = 'restart'\n")
- cls._WriteConfigFileStatic(instance.name, config.getvalue())
- return True
+ return config.getvalue()
from ganeti import errors
+IPV4_NETWORK_MIN_SIZE = 30
+IPV4_NETWORK_MIN_NUM_HOSTS = 2 ** (32 - IPV4_NETWORK_MIN_SIZE)
+
class AddressPool(object):
"""Address pool class, wrapping an C{objects.Network} object.
self.net = network
self.network = ipaddr.IPNetwork(self.net.network)
+ if self.network.numhosts < IPV4_NETWORK_MIN_NUM_HOSTS:
+ raise errors.AddressPoolError("A network with only %s host(s) is too"
+ " small, please specify at least a /%s"
+ " network" %
+ (str(self.network.numhosts),
+ IPV4_NETWORK_MIN_SIZE))
if self.net.gateway:
self.gateway = ipaddr.IPAddress(self.net.gateway)
obj = cls(**val_str) # pylint: disable=W0142
return obj
- @staticmethod
- def _ContainerToDicts(container):
- """Convert the elements of a container to standard python types.
-
- This method converts a container with elements derived from
- ConfigData to standard python types. If the container is a dict,
- we don't touch the keys, only the values.
-
- """
- if isinstance(container, dict):
- ret = dict([(k, v.ToDict()) for k, v in container.iteritems()])
- elif isinstance(container, (list, tuple, set, frozenset)):
- ret = [elem.ToDict() for elem in container]
- else:
- raise TypeError("Invalid type %s passed to _ContainerToDicts" %
- type(container))
- return ret
-
- @staticmethod
- def _ContainerFromDicts(source, c_type, e_type):
- """Convert a container from standard python types.
-
- This method converts a container with standard python types to
- ConfigData objects. If the container is a dict, we don't touch the
- keys, only the values.
-
- """
- if not isinstance(c_type, type):
- raise TypeError("Container type %s passed to _ContainerFromDicts is"
- " not a type" % type(c_type))
- if source is None:
- source = c_type()
- if c_type is dict:
- ret = dict([(k, e_type.FromDict(v)) for k, v in source.iteritems()])
- elif c_type in (list, tuple, set, frozenset):
- ret = c_type([e_type.FromDict(elem) for elem in source])
- else:
- raise TypeError("Invalid container type %s passed to"
- " _ContainerFromDicts" % c_type)
- return ret
-
def Copy(self):
"""Makes a deep copy of the current object and its children.
mydict = super(ConfigData, self).ToDict()
mydict["cluster"] = mydict["cluster"].ToDict()
for key in "nodes", "instances", "nodegroups", "networks":
- mydict[key] = self._ContainerToDicts(mydict[key])
+ mydict[key] = outils.ContainerToDicts(mydict[key])
return mydict
"""
obj = super(ConfigData, cls).FromDict(val)
obj.cluster = Cluster.FromDict(obj.cluster)
- obj.nodes = cls._ContainerFromDicts(obj.nodes, dict, Node)
- obj.instances = cls._ContainerFromDicts(obj.instances, dict, Instance)
- obj.nodegroups = cls._ContainerFromDicts(obj.nodegroups, dict, NodeGroup)
- obj.networks = cls._ContainerFromDicts(obj.networks, dict, Network)
+ obj.nodes = outils.ContainerFromDicts(obj.nodes, dict, Node)
+ obj.instances = \
+ outils.ContainerFromDicts(obj.instances, dict, Instance)
+ obj.nodegroups = \
+ outils.ContainerFromDicts(obj.nodegroups, dict, NodeGroup)
+ obj.networks = outils.ContainerFromDicts(obj.networks, dict, Network)
return obj
def HasAnyDiskOfType(self, dev_type):
for attr in ("children",):
alist = bo.get(attr, None)
if alist:
- bo[attr] = self._ContainerToDicts(alist)
+ bo[attr] = outils.ContainerToDicts(alist)
return bo
@classmethod
"""
obj = super(Disk, cls).FromDict(val)
if obj.children:
- obj.children = cls._ContainerFromDicts(obj.children, list, Disk)
+ obj.children = outils.ContainerFromDicts(obj.children, list, Disk)
if obj.logical_id and isinstance(obj.logical_id, list):
obj.logical_id = tuple(obj.logical_id)
if obj.physical_id and isinstance(obj.physical_id, list):
for attr in "nics", "disks":
alist = bo.get(attr, None)
if alist:
- nlist = self._ContainerToDicts(alist)
+ nlist = outils.ContainerToDicts(alist)
else:
nlist = []
bo[attr] = nlist
if "admin_up" in val:
del val["admin_up"]
obj = super(Instance, cls).FromDict(val)
- obj.nics = cls._ContainerFromDicts(obj.nics, list, NIC)
- obj.disks = cls._ContainerFromDicts(obj.disks, list, Disk)
+ obj.nics = outils.ContainerFromDicts(obj.nics, list, NIC)
+ obj.disks = outils.ContainerFromDicts(obj.disks, list, Disk)
return obj
def UpgradeConfig(self):
hv_state = data.get("hv_state", None)
if hv_state is not None:
- data["hv_state"] = self._ContainerToDicts(hv_state)
+ data["hv_state"] = outils.ContainerToDicts(hv_state)
disk_state = data.get("disk_state", None)
if disk_state is not None:
data["disk_state"] = \
- dict((key, self._ContainerToDicts(value))
+ dict((key, outils.ContainerToDicts(value))
for (key, value) in disk_state.items())
return data
obj = super(Node, cls).FromDict(val)
if obj.hv_state is not None:
- obj.hv_state = cls._ContainerFromDicts(obj.hv_state, dict, NodeHvState)
+ obj.hv_state = \
+ outils.ContainerFromDicts(obj.hv_state, dict, NodeHvState)
if obj.disk_state is not None:
obj.disk_state = \
- dict((key, cls._ContainerFromDicts(value, dict, NodeDiskState))
+ dict((key, outils.ContainerFromDicts(value, dict, NodeDiskState))
for (key, value) in obj.disk_state.items())
return obj
"""
mydict = super(Cluster, self).ToDict()
- mydict["tcpudp_port_pool"] = list(self.tcpudp_port_pool)
+
+ if self.tcpudp_port_pool is None:
+ tcpudp_port_pool = []
+ else:
+ tcpudp_port_pool = list(self.tcpudp_port_pool)
+
+ mydict["tcpudp_port_pool"] = tcpudp_port_pool
+
return mydict
@classmethod
"""
obj = super(Cluster, cls).FromDict(val)
- if not isinstance(obj.tcpudp_port_pool, set):
+
+ if obj.tcpudp_port_pool is None:
+ obj.tcpudp_port_pool = set()
+ elif not isinstance(obj.tcpudp_port_pool, set):
obj.tcpudp_port_pool = set(obj.tcpudp_port_pool)
+
return obj
def SimpleFillDP(self, diskparams):
"""
mydict = super(_QueryResponseBase, self).ToDict()
- mydict["fields"] = self._ContainerToDicts(mydict["fields"])
+ mydict["fields"] = outils.ContainerToDicts(mydict["fields"])
return mydict
@classmethod
"""
obj = super(_QueryResponseBase, cls).FromDict(val)
- obj.fields = cls._ContainerFromDicts(obj.fields, list, QueryFieldDefinition)
+ obj.fields = \
+ outils.ContainerFromDicts(obj.fields, list, QueryFieldDefinition)
return obj
This is used just for debugging and testing.
Parameters:
- - duration: the time to sleep
+ - duration: the time to sleep, in seconds
- on_master: if true, sleep on the master
- on_nodes: list of nodes in which to sleep
"""Module for object related utils."""
+#: Supported container types for serialization/de-serialization (must be a
+#: tuple as it's used as a parameter for C{isinstance})
+_SEQUENCE_TYPES = (list, tuple, set, frozenset)
+
+
class AutoSlots(type):
"""Meta base class for __slots__ definitions.
"""
raise NotImplementedError
+
+
+def ContainerToDicts(container):
+ """Convert the elements of a container to standard Python types.
+
+ This method converts a container with elements to standard Python types. If
+ the input container is of the type C{dict}, only its values are touched.
+ Those values, as well as all elements of input sequences, must support a
+ C{ToDict} method returning a serialized version.
+
+ @type container: dict or sequence (see L{_SEQUENCE_TYPES})
+
+ """
+ if isinstance(container, dict):
+ ret = dict([(k, v.ToDict()) for k, v in container.items()])
+ elif isinstance(container, _SEQUENCE_TYPES):
+ ret = [elem.ToDict() for elem in container]
+ else:
+ raise TypeError("Unknown container type '%s'" % type(container))
+
+ return ret
+
+
+def ContainerFromDicts(source, c_type, e_type):
+ """Convert a container from standard python types.
+
+ This method converts a container with standard Python types to objects. If
+ the container is a dict, we don't touch the keys, only the values.
+
+ @type source: None, dict or sequence (see L{_SEQUENCE_TYPES})
+ @param source: Input data
+ @type c_type: type class
+ @param c_type: Desired type for returned container
+ @type e_type: element type class
+ @param e_type: Item type for elements in returned container (must have a
+ C{FromDict} class method)
+
+ """
+ if not isinstance(c_type, type):
+ raise TypeError("Container type '%s' is not a type" % type(c_type))
+
+ if source is None:
+ source = c_type()
+
+ if c_type is dict:
+ ret = dict([(k, e_type.FromDict(v)) for k, v in source.items()])
+ elif c_type in _SEQUENCE_TYPES:
+ ret = c_type(map(e_type.FromDict, source))
+ else:
+ raise TypeError("Unknown container type '%s'" % c_type)
+
+ return ret
#
#
-# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
"""Returns a list of all nodes.
"""
- client = self.GetClient(query=False)
+ client = self.GetClient(query=True)
if self.useBulk():
bulkdata = client.QueryNodes([], N_FIELDS, False)
"""
node_name = self.items[0]
- client = self.GetClient(query=False)
+ client = self.GetClient(query=True)
result = baserlib.HandleItemQueryErrors(client.QueryNodes,
names=[node_name], fields=N_FIELDS,
"""Sleep for a fixed amount of time.
@type duration: float
- @param duration: the sleep duration
- @rtype: boolean
- @return: False for negative value, True otherwise
+ @param duration: the sleep duration, in seconds
+ @rtype: (boolean, str)
+ @return: False for negative value, and an accompanying error message;
+ True otherwise (and msg is None)
"""
if duration < 0:
return _VIRT_HOSTNAME
-def _MakeNodeRoot(base, node_name):
+def MakeNodeRoot(base, node_name):
"""Appends a node name to the base directory.
"""
"""
if _basedir:
pure = _RemoveNodePrefix(filename, _noderoot=_noderoot)
- result = "%s/%s" % (_MakeNodeRoot(_basedir, node_name), pure)
+ result = "%s/%s" % (MakeNodeRoot(_basedir, node_name), pure)
else:
result = filename
"""
if _basedir:
return {
- _ROOTDIR_ENVNAME: _MakeNodeRoot(_basedir, hostname),
+ _ROOTDIR_ENVNAME: MakeNodeRoot(_basedir, hostname),
_HOSTNAME_ENVNAME: hostname,
}
else:
import qa_error
import qa_group
import qa_instance
+import qa_network
import qa_node
import qa_os
import qa_job
if qa_config.TestEnabled("instance-rename"):
tgt_instance = qa_config.AcquireInstance()
try:
- rename_source = instance["name"]
- rename_target = tgt_instance["name"]
+ rename_source = instance.name
+ rename_target = tgt_instance.name
# perform instance rename to the same name
RunTest(qa_instance.TestInstanceRenameAndBack,
rename_source, rename_source)
RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
rename_source, rename_target)
finally:
- qa_config.ReleaseInstance(tgt_instance)
+ tgt_instance.Release()
RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
RunTestIf("group-list", qa_group.TestGroupListFields)
+def RunNetworkTests():
+ """Run tests for network management.
+
+ """
+ RunTestIf("network", qa_network.TestNetworkAddRemove)
+ RunTestIf("network", qa_network.TestNetworkConnect)
+
+
def RunGroupRwTests():
"""Run tests for adding/removing/renaming groups.
RunTest(qa_instance.TestInstanceStartup, newinst)
RunTest(qa_instance.TestInstanceRemove, newinst)
finally:
- qa_config.ReleaseInstance(newinst)
+ newinst.Release()
finally:
- qa_config.ReleaseNode(expnode)
+ expnode.Release()
if qa_config.TestEnabled(["rapi", "inter-cluster-instance-move"]):
newinst = qa_config.AcquireInstance()
RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
inodes, tnode)
finally:
- qa_config.ReleaseNode(tnode)
+ tnode.Release()
finally:
- qa_config.ReleaseInstance(newinst)
+ newinst.Release()
def RunDaemonTests(instance):
if qa_config.TestEnabled("instance-add-plain-disk"):
# Make sure that the cluster doesn't have any pre-existing problem
qa_cluster.AssertClusterVerify()
+
+ # Create and allocate instances
instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
- instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
- # cluster-verify checks that disks are allocated correctly
- qa_cluster.AssertClusterVerify()
- qa_instance.TestInstanceRemove(instance1)
- qa_instance.TestInstanceRemove(instance2)
+ try:
+ instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
+ try:
+ # cluster-verify checks that disks are allocated correctly
+ qa_cluster.AssertClusterVerify()
+
+ # Remove instances
+ qa_instance.TestInstanceRemove(instance2)
+ qa_instance.TestInstanceRemove(instance1)
+ finally:
+ instance2.Release()
+ finally:
+ instance1.Release()
+
if qa_config.TestEnabled("instance-add-drbd-disk"):
snode = qa_config.AcquireNode()
try:
qa_cluster.TestSetExclStorCluster(False)
instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
- qa_cluster.TestSetExclStorCluster(True)
- exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
- qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
- qa_instance.TestInstanceRemove(instance)
+ try:
+ qa_cluster.TestSetExclStorCluster(True)
+ exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
+ qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
+ qa_instance.TestInstanceRemove(instance)
+ finally:
+ instance.Release()
finally:
- qa_config.ReleaseNode(snode)
+ snode.Release()
qa_cluster.TestSetExclStorCluster(old_es)
finally:
- qa_config.ReleaseNode(node)
+ node.Release()
def RunInstanceTests():
inodes = qa_config.AcquireManyNodes(num_nodes)
try:
instance = RunTest(create_fun, inodes)
-
- RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
- RunDaemonTests(instance)
- for node in inodes:
- RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node)
- if len(inodes) > 1:
- RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
- constants.INITIAL_NODE_GROUP_NAME,
- inodes[0]["primary"], inodes[1]["primary"])
- if qa_config.TestEnabled("instance-convert-disk"):
- RunTest(qa_instance.TestInstanceShutdown, instance)
- RunTest(qa_instance.TestInstanceConvertDiskToPlain, instance, inodes)
- RunTest(qa_instance.TestInstanceStartup, instance)
- RunCommonInstanceTests(instance)
- RunGroupListTests()
- RunExportImportTests(instance, inodes)
- RunHardwareFailureTests(instance, inodes)
- RunRepairDiskSizes()
- RunTest(qa_instance.TestInstanceRemove, instance)
+ try:
+ RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
+ RunDaemonTests(instance)
+ for node in inodes:
+ RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node)
+ if len(inodes) > 1:
+ RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
+ constants.INITIAL_NODE_GROUP_NAME,
+ inodes[0].primary, inodes[1].primary)
+ if qa_config.TestEnabled("instance-convert-disk"):
+ RunTest(qa_instance.TestInstanceShutdown, instance)
+ RunTest(qa_instance.TestInstanceConvertDiskToPlain,
+ instance, inodes)
+ RunTest(qa_instance.TestInstanceStartup, instance)
+ RunCommonInstanceTests(instance)
+ RunGroupListTests()
+ RunExportImportTests(instance, inodes)
+ RunHardwareFailureTests(instance, inodes)
+ RunRepairDiskSizes()
+ RunTest(qa_instance.TestInstanceRemove, instance)
+ finally:
+ instance.Release()
del instance
finally:
qa_config.ReleaseManyNodes(inodes)
RunCommonNodeTests()
RunGroupListTests()
RunGroupRwTests()
+ RunNetworkTests()
# The master shouldn't be readded or put offline; "delay" needs a non-master
# node to test
RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
RunTestIf("delay", qa_cluster.TestDelay, pnode)
finally:
- qa_config.ReleaseNode(pnode)
+ pnode.Release()
# Make sure the cluster is clean before running instance tests
qa_cluster.AssertClusterVerify()
for use_client in [True, False]:
rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
use_client)
- if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
- RunCommonInstanceTests(rapi_instance)
- RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
+ try:
+ if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
+ RunCommonInstanceTests(rapi_instance)
+ RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
+ finally:
+ rapi_instance.Release()
del rapi_instance
finally:
- qa_config.ReleaseNode(pnode)
+ pnode.Release()
config_list = [
("default-instance-tests", lambda: None, lambda _: None),
if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
for shutdown in [False, True]:
instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
- expnode = qa_config.AcquireNode(exclude=pnode)
try:
- if shutdown:
- # Stop instance before exporting and removing it
- RunTest(qa_instance.TestInstanceShutdown, instance)
- RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
- RunTest(qa_instance.TestBackupList, expnode)
+ expnode = qa_config.AcquireNode(exclude=pnode)
+ try:
+ if shutdown:
+ # Stop instance before exporting and removing it
+ RunTest(qa_instance.TestInstanceShutdown, instance)
+ RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
+ RunTest(qa_instance.TestBackupList, expnode)
+ finally:
+ expnode.Release()
finally:
- qa_config.ReleaseNode(expnode)
+ instance.Release()
del expnode
del instance
qa_cluster.AssertClusterVerify()
finally:
- qa_config.ReleaseNode(pnode)
+ pnode.Release()
RunExclusiveStorageTests()
RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
set_offline, set_online)
finally:
- qa_config.ReleaseNode(pnode)
+ pnode.Release()
finally:
- qa_config.ReleaseNode(snode)
+ snode.Release()
qa_cluster.AssertClusterVerify()
RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
parser.add_option("--yes-do-it", dest="yes_do_it",
action="store_true",
help="Really execute the tests")
- (qa_config.options, args) = parser.parse_args()
+ (opts, args) = parser.parse_args()
if len(args) == 1:
(config_file, ) = args
else:
parser.error("Wrong number of arguments.")
- if not qa_config.options.yes_do_it:
+ if not opts.yes_do_it:
print ("Executing this script irreversibly destroys any Ganeti\n"
"configuration on all nodes involved. If you really want\n"
"to start testing, supply the --yes-do-it option.")
qa_config.Load(config_file)
- primary = qa_config.GetMasterNode()["primary"]
+ primary = qa_config.GetMasterNode().primary
qa_utils.StartMultiplexer(primary)
print ("SSH command for primary node: %s" %
utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
"# Default network interface parameters": null,
"#default-nicparams": {
"mode": "bridged",
- "link": "xen-br0",
+ "link": "xen-br0"
},
"os": "debian-etch",
]
},
+ "networks": {
+ "inexistent-networks": [
+ "network1",
+ "network2",
+ "network3"
+ ]
+ },
+
"tests": {
"# Whether tests are enabled or disabled by default": null,
"default": true,
"group-list": true,
"group-rwops": true,
+ "network": false,
+
"node-list": true,
"node-info": true,
"node-volumes": true,
"""
cmd = utils.ShellQuoteArgs(["cat", filename])
for node in qa_config.get("nodes"):
- AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
+ AssertEqual(qa_utils.GetCommandOutput(node.primary, cmd), content)
# "gnt-cluster info" fields
"""
master = qa_config.GetMasterNode()
infocmd = "gnt-cluster info"
- info_out = qa_utils.GetCommandOutput(master["primary"], infocmd)
+ info_out = qa_utils.GetCommandOutput(master.primary, infocmd)
ret = None
for l in info_out.splitlines():
m = _CIFIELD_RE.match(l)
cvcmd = "gnt-cluster verify"
mnode = qa_config.GetMasterNode()
if errors:
- cvout = GetCommandOutput(mnode["primary"], cvcmd + " --error-codes",
+ cvout = GetCommandOutput(mnode.primary, cvcmd + " --error-codes",
fail=True)
actual = _GetCVErrorCodes(cvout)
expected = compat.UniqueFrozenset(e for (_, e, _) in errors)
fh.write("%s %s write\n" % (rapi_user, rapi_secret))
fh.flush()
- tmpru = qa_utils.UploadFile(master["primary"], fh.name)
+ tmpru = qa_utils.UploadFile(master.primary, fh.name)
try:
AssertCommand(["mkdir", "-p", rapi_dir])
AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
if spec:
cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
- if master.get("secondary", None):
- cmd.append("--secondary-ip=%s" % master["secondary"])
+ if master.secondary:
+ cmd.append("--secondary-ip=%s" % master.secondary)
vgname = qa_config.get("vg-name", None)
if vgname:
master = qa_config.GetMasterNode()
# Assert that OOB is unavailable for all nodes
- result_output = GetCommandOutput(master["primary"],
+ result_output = GetCommandOutput(master.primary,
"gnt-node list --verbose --no-headers -o"
" powered")
AssertEqual(compat.all(powered == "(unavail)"
AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
# Unless --all is given master is not allowed to be in the list
- AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
+ AssertCommand(["gnt-cluster", "epo", "-f", master.primary], fail=True)
# This shouldn't fail
AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
# All instances should have been stopped now
- result_output = GetCommandOutput(master["primary"],
+ result_output = GetCommandOutput(master.primary,
"gnt-instance list --no-headers -o status")
# ERROR_down because the instance is stopped but not recorded as such
AssertEqual(compat.all(status == "ERROR_down"
AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
# All instances should have been started now
- result_output = GetCommandOutput(master["primary"],
+ result_output = GetCommandOutput(master.primary,
"gnt-instance list --no-headers -o status")
AssertEqual(compat.all(status == "running"
for status in result_output.splitlines()), True)
AssertCommand(["gnt-debug", "delay", "1"])
AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
AssertCommand(["gnt-debug", "delay", "--no-master",
- "-n", node["primary"], "1"])
+ "-n", node.primary, "1"])
def TestClusterReservedLvs():
"--rapi-certificate=/dev/null"]
AssertCommand(cmd, fail=True)
- rapi_cert_backup = qa_utils.BackupFile(master["primary"],
+ rapi_cert_backup = qa_utils.BackupFile(master.primary,
pathutils.RAPI_CERT_FILE)
try:
# Custom RAPI certificate
utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
- tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
+ tmpcert = qa_utils.UploadFile(master.primary, fh.name)
try:
AssertCommand(["gnt-cluster", "renew-crypto", "--force",
"--rapi-certificate=%s" % tmpcert])
cds_fh.write("\n")
cds_fh.flush()
- tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
+ tmpcds = qa_utils.UploadFile(master.primary, cds_fh.name)
try:
AssertCommand(["gnt-cluster", "renew-crypto", "--force",
"--cluster-domain-secret=%s" % tmpcds])
master = qa_config.GetMasterNode()
options = qa_config.get("options", {})
- disk_template = options.get("burnin-disk-template", "drbd")
+ disk_template = options.get("burnin-disk-template", constants.DT_DRBD8)
parallel = options.get("burnin-in-parallel", False)
check_inst = options.get("burnin-check-instances", False)
do_rename = options.get("burnin-rename", "")
if len(instances) < 1:
raise qa_error.Error("Burnin needs at least one instance")
- script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
+ script = qa_utils.UploadFile(master.primary, "../tools/burnin")
try:
# Run burnin
cmd = [script,
cmd.append("--no-reboot")
else:
cmd.append("--reboot-types=%s" % ",".join(reboot_types))
- cmd += [inst["name"] for inst in instances]
+ cmd += [inst.name for inst in instances]
AssertCommand(cmd)
finally:
AssertCommand(["rm", "-f", script])
finally:
for inst in instances:
- qa_config.ReleaseInstance(inst)
+ inst.Release()
def TestClusterMasterFailover():
# Back to original master node
AssertCommand(cmd, node=master)
finally:
- qa_config.ReleaseNode(failovermaster)
+ failovermaster.Release()
def TestClusterMasterFailoverWithDrainedQueue():
# Back to original master node
AssertCommand(cmd, node=master)
finally:
- qa_config.ReleaseNode(failovermaster)
+ failovermaster.Release()
AssertCommand(drain_check, fail=True)
AssertCommand(drain_check, node=failovermaster, fail=True)
f.seek(0)
# Upload file to master node
- testname = qa_utils.UploadFile(master["primary"], f.name)
+ testname = qa_utils.UploadFile(master.primary, f.name)
try:
# Copy file to all nodes
AssertCommand(["gnt-cluster", "copyfile", testname])
vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
lvname1 = _QA_LV_PREFIX + "vol1"
lvname2 = _QA_LV_PREFIX + "vol2"
- node_name = node["primary"]
+ node_name = node.primary
AssertCommand(["lvcreate", "-L1G", "-n", lvname1, vgname], node=node_name)
AssertClusterVerify(fail=True, errors=[constants.CV_ENODEORPHANLV])
AssertCommand(["lvcreate", "-L1G", "-n", lvname2, vgname], node=node_name)
_INSTANCE_CHECK_KEY = "instance-check"
_ENABLED_HV_KEY = "enabled-hypervisors"
-# Key to store the cluster-wide run-time value of the exclusive storage flag
-_EXCLUSIVE_STORAGE_KEY = "_exclusive_storage"
+#: QA configuration (L{_QaConfig})
+_config = None
-cfg = {}
-options = None
+class _QaInstance(object):
+ __slots__ = [
+ "name",
+ "nicmac",
+ "used",
+ "_disk_template",
+ ]
-def Load(path):
- """Loads the passed configuration file.
+ def __init__(self, name, nicmac):
+ """Initializes instances of this class.
+
+ """
+ self.name = name
+ self.nicmac = nicmac
+ self.used = None
+ self._disk_template = None
+
+ @classmethod
+ def FromDict(cls, data):
+ """Creates instance object from JSON dictionary.
+
+ """
+ nicmac = []
+
+ macaddr = data.get("nic.mac/0")
+ if macaddr:
+ nicmac.append(macaddr)
+
+ return cls(name=data["name"], nicmac=nicmac)
+
+ def Release(self):
+ """Releases instance and makes it available again.
+
+ """
+ assert self.used, \
+ ("Instance '%s' was never acquired or released more than once" %
+ self.name)
+
+ self.used = False
+ self._disk_template = None
+
+ def GetNicMacAddr(self, idx, default):
+ """Returns MAC address for NIC.
+
+ @type idx: int
+ @param idx: NIC index
+ @param default: Default value
+
+ """
+ if len(self.nicmac) > idx:
+ return self.nicmac[idx]
+ else:
+ return default
+
+ def SetDiskTemplate(self, template):
+ """Set the disk template.
+
+ """
+ assert template in constants.DISK_TEMPLATES
+
+ self._disk_template = template
+
+ @property
+ def disk_template(self):
+ """Returns the current disk template.
+
+ """
+ return self._disk_template
+
+
+class _QaNode(object):
+ __slots__ = [
+ "primary",
+ "secondary",
+ "_added",
+ "_use_count",
+ ]
+
+ def __init__(self, primary, secondary):
+ """Initializes instances of this class.
+
+ """
+ self.primary = primary
+ self.secondary = secondary
+ self._added = False
+ self._use_count = 0
+
+ @classmethod
+ def FromDict(cls, data):
+ """Creates node object from JSON dictionary.
+
+ """
+ return cls(primary=data["primary"], secondary=data.get("secondary"))
+
+ def Use(self):
+ """Marks a node as being in use.
+
+ """
+ assert self._use_count >= 0
+
+ self._use_count += 1
+
+ return self
+
+ def Release(self):
+ """Release a node (opposite of L{Use}).
+
+ """
+ assert self.use_count > 0
+
+ self._use_count -= 1
+
+ def MarkAdded(self):
+ """Marks node as having been added to a cluster.
+
+ """
+ assert not self._added
+ self._added = True
+
+ def MarkRemoved(self):
+ """Marks node as having been removed from a cluster.
+
+ """
+ assert self._added
+ self._added = False
+
+ @property
+ def added(self):
+ """Returns whether a node is part of a cluster.
+
+ """
+ return self._added
+
+ @property
+ def use_count(self):
+ """Returns number of current uses (controlled by L{Use} and L{Release}).
+
+ """
+ return self._use_count
+
+
+_RESOURCE_CONVERTER = {
+ "instances": _QaInstance.FromDict,
+ "nodes": _QaNode.FromDict,
+ }
+
+
+def _ConvertResources((key, value)):
+ """Converts cluster resources in configuration to Python objects.
"""
- global cfg # pylint: disable=W0603
+ fn = _RESOURCE_CONVERTER.get(key, None)
+ if fn:
+ return (key, map(fn, value))
+ else:
+ return (key, value)
+
+
+class _QaConfig(object):
+ def __init__(self, data):
+ """Initializes instances of this class.
+
+ """
+ self._data = data
+
+ #: Cluster-wide run-time value of the exclusive storage flag
+ self._exclusive_storage = None
+
+ @classmethod
+ def Load(cls, filename):
+ """Loads a configuration file and produces a configuration object.
+
+ @type filename: string
+ @param filename: Path to configuration file
+ @rtype: L{_QaConfig}
+
+ """
+ data = serializer.LoadJson(utils.ReadFile(filename))
+
+ result = cls(dict(map(_ConvertResources,
+ data.items()))) # pylint: disable=E1103
+ result.Validate()
+
+ return result
+
+ def Validate(self):
+ """Validates loaded configuration data.
- cfg = serializer.LoadJson(utils.ReadFile(path))
+ """
+ if not self.get("nodes"):
+ raise qa_error.Error("Need at least one node")
+
+ if not self.get("instances"):
+ raise qa_error.Error("Need at least one instance")
+
+ if (self.get("disk") is None or
+ self.get("disk-growth") is None or
+ len(self.get("disk")) != len(self.get("disk-growth"))):
+ raise qa_error.Error("Config options 'disk' and 'disk-growth' must exist"
+ " and have the same number of items")
+
+ check = self.GetInstanceCheckScript()
+ if check:
+ try:
+ os.stat(check)
+ except EnvironmentError, err:
+ raise qa_error.Error("Can't find instance check script '%s': %s" %
+ (check, err))
+
+ enabled_hv = frozenset(self.GetEnabledHypervisors())
+ if not enabled_hv:
+ raise qa_error.Error("No hypervisor is enabled")
+
+ difference = enabled_hv - constants.HYPER_TYPES
+ if difference:
+ raise qa_error.Error("Unknown hypervisor(s) enabled: %s" %
+ utils.CommaJoin(difference))
+
+ def __getitem__(self, name):
+ """Returns configuration value.
+
+ @type name: string
+ @param name: Name of configuration entry
+
+ """
+ return self._data[name]
+
+ def get(self, name, default=None):
+ """Returns configuration value.
+
+ @type name: string
+ @param name: Name of configuration entry
+ @param default: Default value
+
+ """
+ return self._data.get(name, default)
+
+ def GetMasterNode(self):
+ """Returns the default master node for the cluster.
+
+ """
+ return self["nodes"][0]
+
+ def GetInstanceCheckScript(self):
+ """Returns path to instance check script or C{None}.
- Validate()
+ """
+ return self._data.get(_INSTANCE_CHECK_KEY, None)
+ def GetEnabledHypervisors(self):
+ """Returns list of enabled hypervisors.
-def Validate():
- if len(cfg["nodes"]) < 1:
- raise qa_error.Error("Need at least one node")
- if len(cfg["instances"]) < 1:
- raise qa_error.Error("Need at least one instance")
- if len(cfg["disk"]) != len(cfg["disk-growth"]):
- raise qa_error.Error("Config options 'disk' and 'disk-growth' must have"
- " the same number of items")
+ @rtype: list
- check = GetInstanceCheckScript()
- if check:
+ """
try:
- os.stat(check)
- except EnvironmentError, err:
- raise qa_error.Error("Can't find instance check script '%s': %s" %
- (check, err))
+ value = self._data[_ENABLED_HV_KEY]
+ except KeyError:
+ return [constants.DEFAULT_ENABLED_HYPERVISOR]
+ else:
+ if value is None:
+ return []
+ elif isinstance(value, basestring):
+ # The configuration key ("enabled-hypervisors") implies there can be
+ # multiple values. Multiple hypervisors are comma-separated on the
+ # command line option to "gnt-cluster init", so we need to handle them
+ # equally here.
+ return value.split(",")
+ else:
+ return value
+
+ def GetDefaultHypervisor(self):
+ """Returns the default hypervisor to be used.
+
+ """
+ return self.GetEnabledHypervisors()[0]
+
+ def SetExclusiveStorage(self, value):
+ """Set the expected value of the C{exclusive_storage} flag for the cluster.
+
+ """
+ self._exclusive_storage = bool(value)
+
+ def GetExclusiveStorage(self):
+ """Get the expected value of the C{exclusive_storage} flag for the cluster.
+
+ """
+ value = self._exclusive_storage
+ assert value is not None
+ return value
+
+ def IsTemplateSupported(self, templ):
+ """Is the given disk template supported by the current configuration?
+
+ """
+ return (not self.GetExclusiveStorage() or
+ templ in constants.DTS_EXCL_STORAGE)
+
+
+def Load(path):
+ """Loads the passed configuration file.
+
+ """
+ global _config # pylint: disable=W0603
- enabled_hv = frozenset(GetEnabledHypervisors())
- if not enabled_hv:
- raise qa_error.Error("No hypervisor is enabled")
+ _config = _QaConfig.Load(path)
- difference = enabled_hv - constants.HYPER_TYPES
- if difference:
- raise qa_error.Error("Unknown hypervisor(s) enabled: %s" %
- utils.CommaJoin(difference))
+
+def GetConfig():
+ """Returns the configuration object.
+
+ """
+ if _config is None:
+ raise RuntimeError("Configuration not yet loaded")
+
+ return _config
def get(name, default=None):
- return cfg.get(name, default)
+ """Wrapper for L{_QaConfig.get}.
+
+ """
+ return GetConfig().get(name, default=default)
class Either:
"""
if _cfg is None:
- _cfg = cfg
+ cfg = GetConfig()
+ else:
+ cfg = _cfg
# Get settings for all tests
- cfg_tests = _cfg.get("tests", {})
+ cfg_tests = cfg.get("tests", {})
# Get default setting
default = cfg_tests.get("default", True)
tests, compat.all)
-def GetInstanceCheckScript():
- """Returns path to instance check script or C{None}.
+def GetInstanceCheckScript(*args):
+ """Wrapper for L{_QaConfig.GetInstanceCheckScript}.
"""
- return cfg.get(_INSTANCE_CHECK_KEY, None)
-
+ return GetConfig().GetInstanceCheckScript(*args)
-def GetEnabledHypervisors():
- """Returns list of enabled hypervisors.
- @rtype: list
+def GetEnabledHypervisors(*args):
+ """Wrapper for L{_QaConfig.GetEnabledHypervisors}.
"""
- try:
- value = cfg[_ENABLED_HV_KEY]
- except KeyError:
- return [constants.DEFAULT_ENABLED_HYPERVISOR]
- else:
- if isinstance(value, basestring):
- # The configuration key ("enabled-hypervisors") implies there can be
- # multiple values. Multiple hypervisors are comma-separated on the
- # command line option to "gnt-cluster init", so we need to handle them
- # equally here.
- return value.split(",")
- else:
- return value
+ return GetConfig().GetEnabledHypervisors(*args)
-def GetDefaultHypervisor():
- """Returns the default hypervisor to be used.
+def GetDefaultHypervisor(*args):
+ """Wrapper for L{_QaConfig.GetDefaultHypervisor}.
"""
- return GetEnabledHypervisors()[0]
+ return GetConfig().GetDefaultHypervisor(*args)
-def GetInstanceNicMac(inst, default=None):
- """Returns MAC address for instance's network interface.
+def GetMasterNode():
+ """Wrapper for L{_QaConfig.GetMasterNode}.
"""
- return inst.get("nic.mac/0", default)
-
-
-def GetMasterNode():
- return cfg["nodes"][0]
+ return GetConfig().GetMasterNode()
-def AcquireInstance():
+def AcquireInstance(_cfg=None):
"""Returns an instance which isn't in use.
"""
+ if _cfg is None:
+ cfg = GetConfig()
+ else:
+ cfg = _cfg
+
# Filter out unwanted instances
- tmp_flt = lambda inst: not inst.get("_used", False)
- instances = filter(tmp_flt, cfg["instances"])
- del tmp_flt
+ instances = filter(lambda inst: not inst.used, cfg["instances"])
- if len(instances) == 0:
+ if not instances:
raise qa_error.OutOfInstancesError("No instances left")
inst = instances[0]
- inst["_used"] = True
- inst["_template"] = None
- return inst
-
-
-def ReleaseInstance(inst):
- inst["_used"] = False
-
-
-def GetInstanceTemplate(inst):
- """Return the disk template of an instance.
-
- """
- templ = inst["_template"]
- assert templ is not None
- return templ
+ assert not inst.used
+ assert inst.disk_template is None
-def SetInstanceTemplate(inst, template):
- """Set the disk template for an instance.
+ inst.used = True
- """
- inst["_template"] = template
+ return inst
def SetExclusiveStorage(value):
- """Set the expected value of the exclusive_storage flag for the cluster.
+ """Wrapper for L{_QaConfig.SetExclusiveStorage}.
"""
- cfg[_EXCLUSIVE_STORAGE_KEY] = bool(value)
+ return GetConfig().SetExclusiveStorage(value)
def GetExclusiveStorage():
- """Get the expected value of the exclusive_storage flag for the cluster.
+ """Wrapper for L{_QaConfig.GetExclusiveStorage}.
"""
- val = cfg.get(_EXCLUSIVE_STORAGE_KEY)
- assert val is not None
- return val
+ return GetConfig().GetExclusiveStorage()
def IsTemplateSupported(templ):
- """Is the given templated supported by the current configuration?
+ """Wrapper for L{_QaConfig.GetExclusiveStorage}.
"""
- if GetExclusiveStorage():
- return templ in constants.DTS_EXCL_STORAGE
- else:
- return True
+ return GetConfig().IsTemplateSupported(templ)
-def AcquireNode(exclude=None):
+def AcquireNode(exclude=None, _cfg=None):
"""Returns the least used node.
"""
- master = GetMasterNode()
+ if _cfg is None:
+ cfg = GetConfig()
+ else:
+ cfg = _cfg
+
+ master = cfg.GetMasterNode()
# Filter out unwanted nodes
# TODO: Maybe combine filters
else:
nodes = filter(lambda node: node != exclude, cfg["nodes"])
- tmp_flt = lambda node: node.get("_added", False) or node == master
- nodes = filter(tmp_flt, nodes)
- del tmp_flt
+ nodes = filter(lambda node: node.added or node == master, nodes)
- if len(nodes) == 0:
+ if not nodes:
raise qa_error.OutOfNodesError("No nodes left")
# Get node with least number of uses
+ # TODO: Switch to computing sort key instead of comparing directly
def compare(a, b):
- result = cmp(a.get("_count", 0), b.get("_count", 0))
+ result = cmp(a.use_count, b.use_count)
if result == 0:
- result = cmp(a["primary"], b["primary"])
+ result = cmp(a.primary, b.primary)
return result
nodes.sort(cmp=compare)
- node = nodes[0]
- node["_count"] = node.get("_count", 0) + 1
- return node
+ return nodes[0].Use()
def AcquireManyNodes(num, exclude=None):
return nodes
-def ReleaseNode(node):
- node["_count"] = node.get("_count", 0) - 1
-
-
def ReleaseManyNodes(nodes):
- for n in nodes:
- ReleaseNode(n)
+ for node in nodes:
+ node.Release()
cmd = (utils.ShellQuoteArgs(["gnt-instance", "list", "-o", "status", name]) +
' | grep running')
- ret = StartSSH(master["primary"], cmd).wait()
+ ret = StartSSH(master.primary, cmd).wait()
return ret == 0
AssertCommand(["gnt-cluster", "watcher", "pause", "4h"])
cmd = ["gnt-cluster", "watcher", "info"]
- output = GetCommandOutput(master["primary"],
+ output = GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd))
AssertMatch(output, r"^.*\bis paused\b.*")
AssertCommand(["gnt-cluster", "watcher", "continue"])
cmd = ["gnt-cluster", "watcher", "info"]
- output = GetCommandOutput(master["primary"],
+ output = GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd))
AssertMatch(output, r"^.*\bis not paused\b.*")
"""Test automatic restart of instance by ganeti-watcher.
"""
- inst_name = qa_utils.ResolveInstanceName(instance["name"])
+ inst_name = qa_utils.ResolveInstanceName(instance.name)
_ResetWatcherDaemon()
_ShutdownInstance(inst_name)
"""Test five consecutive instance failures.
"""
- inst_name = qa_utils.ResolveInstanceName(instance["name"])
+ inst_name = qa_utils.ResolveInstanceName(instance.name)
_ResetWatcherDaemon()
pricmd = [pingprimary, "-e"]
seccmd = [pingsecondary, "-e"]
for i in nodes:
- pricmd.append(i["primary"])
- if "secondary" in i:
- seccmd.append(i["secondary"])
+ pricmd.append(i.primary)
+ if i.secondary:
+ seccmd.append(i.secondary)
pristr = utils.ShellQuoteArgs(pricmd)
if seccmd:
(other_group, ) = qa_utils.GetNonexistentGroups(1)
- master_node = qa_config.GetMasterNode()["primary"]
+ master_node = qa_config.GetMasterNode().primary
def AssertInGroup(group, nodes):
real_output = GetCommandOutput(master_node,
if force_mac:
nic0_mac = force_mac
else:
- nic0_mac = qa_config.GetInstanceNicMac(inst)
+ nic0_mac = inst.GetNicMacAddr(0, None)
+
if nic0_mac:
params.extend(["--net", "0:mac=%s" % nic0_mac])
"--disk-template=%s" % disk_template,
"--node=%s" % node] +
_GetGenericAddParameters(instance))
- cmd.append(instance["name"])
+ cmd.append(instance.name)
AssertCommand(cmd)
- _CheckSsconfInstanceList(instance["name"])
- qa_config.SetInstanceTemplate(instance, disk_template)
+ _CheckSsconfInstanceList(instance.name)
+ instance.SetDiskTemplate(disk_template)
return instance
except:
- qa_config.ReleaseInstance(instance)
+ instance.Release()
raise
"""
master = qa_config.GetMasterNode()
infocmd = utils.ShellQuoteArgs(["gnt-instance", "info", instance])
- info_out = qa_utils.GetCommandOutput(master["primary"], infocmd)
+ info_out = qa_utils.GetCommandOutput(master.primary, infocmd)
re_node = re.compile(r"^\s+-\s+(?:primary|secondaries):\s+(\S.+)$")
node_elem = r"([^,()]+)(?:\s+\([^)]+\))?"
# re_nodelist matches a list of nodes returned by gnt-instance info, e.g.:
@param instance: the instance
"""
- info = _GetInstanceInfo(instance["name"])
+ info = _GetInstanceInfo(instance.name)
vols = info["volumes"]
for node in info["nodes"]:
AssertCommand(["lvremove", "-f"] + vols, node=node)
master = qa_config.GetMasterNode()
infocmd = utils.ShellQuoteArgs(["gnt-instance", "list", "--no-headers",
"-o", field, instance])
- info_out = qa_utils.GetCommandOutput(master["primary"], infocmd).strip()
+ info_out = qa_utils.GetCommandOutput(master.primary, infocmd).strip()
if info_out == "Y":
return True
elif info_out == "N":
def IsFailoverSupported(instance):
- templ = qa_config.GetInstanceTemplate(instance)
- return templ in constants.DTS_MIRRORED
+ return instance.disk_template in constants.DTS_MIRRORED
def IsMigrationSupported(instance):
- templ = qa_config.GetInstanceTemplate(instance)
- return templ in constants.DTS_MIRRORED
+ return instance.disk_template in constants.DTS_MIRRORED
def IsDiskReplacingSupported(instance):
- templ = qa_config.GetInstanceTemplate(instance)
- return templ == constants.DT_DRBD8
+ return instance.disk_template == constants.DT_DRBD8
@InstanceCheck(None, INST_UP, RETURN_VALUE)
def TestInstanceAddWithPlainDisk(nodes):
"""gnt-instance add -t plain"""
assert len(nodes) == 1
- return _DiskTest(nodes[0]["primary"], "plain")
+ return _DiskTest(nodes[0].primary, constants.DT_PLAIN)
@InstanceCheck(None, INST_UP, RETURN_VALUE)
def TestInstanceAddWithDrbdDisk(nodes):
"""gnt-instance add -t drbd"""
assert len(nodes) == 2
- return _DiskTest(":".join(map(operator.itemgetter("primary"), nodes)),
- "drbd")
+ return _DiskTest(":".join(map(operator.attrgetter("primary"), nodes)),
+ constants.DT_DRBD8)
@InstanceCheck(None, INST_DOWN, FIRST_ARG)
def TestInstanceRemove(instance):
"""gnt-instance remove"""
- AssertCommand(["gnt-instance", "remove", "-f", instance["name"]])
-
- qa_config.ReleaseInstance(instance)
+ AssertCommand(["gnt-instance", "remove", "-f", instance.name])
@InstanceCheck(INST_DOWN, INST_UP, FIRST_ARG)
def TestInstanceStartup(instance):
"""gnt-instance startup"""
- AssertCommand(["gnt-instance", "startup", instance["name"]])
+ AssertCommand(["gnt-instance", "startup", instance.name])
@InstanceCheck(INST_UP, INST_DOWN, FIRST_ARG)
def TestInstanceShutdown(instance):
"""gnt-instance shutdown"""
- AssertCommand(["gnt-instance", "shutdown", instance["name"]])
+ AssertCommand(["gnt-instance", "shutdown", instance.name])
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
"""gnt-instance reboot"""
options = qa_config.get("options", {})
reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
- name = instance["name"]
+ name = instance.name
for rtype in reboot_types:
AssertCommand(["gnt-instance", "reboot", "--type=%s" % rtype, name])
master = qa_config.GetMasterNode()
cmd = ["gnt-instance", "list", "--no-headers", "-o", "status", name]
- result_output = qa_utils.GetCommandOutput(master["primary"],
+ result_output = qa_utils.GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd))
AssertEqual(result_output.strip(), constants.INSTST_RUNNING)
@InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
def TestInstanceReinstall(instance):
"""gnt-instance reinstall"""
- AssertCommand(["gnt-instance", "reinstall", "-f", instance["name"]])
+ AssertCommand(["gnt-instance", "reinstall", "-f", instance.name])
# Test with non-existant OS definition
AssertCommand(["gnt-instance", "reinstall", "-f",
"--os-type=NonExistantOsForQa",
- instance["name"]],
+ instance.name],
fail=True)
cmd = ["cat", utils.PathJoin(pathutils.DATA_DIR,
"ssconf_%s" % constants.SS_INSTANCE_LIST)]
- return qa_utils.GetCommandOutput(master["primary"],
+ return qa_utils.GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd)).splitlines()
" test")
return
- cmd = ["gnt-instance", "failover", "--force", instance["name"]]
+ cmd = ["gnt-instance", "failover", "--force", instance.name]
# failover ...
AssertCommand(cmd)
" test")
return
- cmd = ["gnt-instance", "migrate", "--force", instance["name"]]
+ cmd = ["gnt-instance", "migrate", "--force", instance.name]
af_par = constants.BE_ALWAYS_FAILOVER
af_field = "be/" + constants.BE_ALWAYS_FAILOVER
- af_init_val = _GetBoolInstanceField(instance["name"], af_field)
+ af_init_val = _GetBoolInstanceField(instance.name, af_field)
# migrate ...
AssertCommand(cmd)
if toggle_always_failover:
AssertCommand(["gnt-instance", "modify", "-B",
("%s=%s" % (af_par, not af_init_val)),
- instance["name"]])
+ instance.name])
AssertCommand(cmd)
# TODO: Verify the choice between failover and migration
qa_utils.RunInstanceCheck(instance, True)
if toggle_always_failover:
AssertCommand(["gnt-instance", "modify", "-B",
- ("%s=%s" % (af_par, af_init_val)), instance["name"]])
+ ("%s=%s" % (af_par, af_init_val)), instance.name])
# TODO: Split into multiple tests
- AssertCommand(["gnt-instance", "shutdown", instance["name"]])
+ AssertCommand(["gnt-instance", "shutdown", instance.name])
qa_utils.RunInstanceCheck(instance, False)
AssertCommand(cmd, fail=True)
AssertCommand(["gnt-instance", "migrate", "--force", "--allow-failover",
- instance["name"]])
- AssertCommand(["gnt-instance", "start", instance["name"]])
+ instance.name])
+ AssertCommand(["gnt-instance", "start", instance.name])
AssertCommand(cmd)
# @InstanceCheck enforces the check that the instance is running
qa_utils.RunInstanceCheck(instance, True)
AssertCommand(["gnt-instance", "modify", "-B",
("%s=%s" %
(constants.BE_ALWAYS_FAILOVER, constants.VALUE_TRUE)),
- instance["name"]])
+ instance.name])
AssertCommand(cmd)
qa_utils.RunInstanceCheck(instance, True)
AssertCommand(["gnt-instance", "modify", "-B",
("%s=%s" %
(constants.BE_ALWAYS_FAILOVER, constants.VALUE_FALSE)),
- instance["name"]])
+ instance.name])
AssertCommand(cmd)
qa_utils.RunInstanceCheck(instance, True)
def TestInstanceInfo(instance):
"""gnt-instance info"""
- AssertCommand(["gnt-instance", "info", instance["name"]])
+ AssertCommand(["gnt-instance", "info", instance.name])
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
])
for alist in args:
- AssertCommand(["gnt-instance", "modify"] + alist + [instance["name"]])
+ AssertCommand(["gnt-instance", "modify"] + alist + [instance.name])
# check no-modify
- AssertCommand(["gnt-instance", "modify", instance["name"]], fail=True)
+ AssertCommand(["gnt-instance", "modify", instance.name], fail=True)
# Marking offline while instance is running must fail...
- AssertCommand(["gnt-instance", "modify", "--offline", instance["name"]],
+ AssertCommand(["gnt-instance", "modify", "--offline", instance.name],
fail=True)
# ...while making it online is ok, and should work
- AssertCommand(["gnt-instance", "modify", "--online", instance["name"]])
+ AssertCommand(["gnt-instance", "modify", "--online", instance.name])
@InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
def TestInstanceStoppedModify(instance):
"""gnt-instance modify (stopped instance)"""
- name = instance["name"]
+ name = instance.name
# Instance was not marked offline; try marking it online once more
AssertCommand(["gnt-instance", "modify", "--online", name])
@InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
def TestInstanceConvertDiskToPlain(instance, inodes):
"""gnt-instance modify -t"""
- name = instance["name"]
- template = qa_config.GetInstanceTemplate(instance)
- if template != "drbd":
+ name = instance.name
+
+ template = instance.disk_template
+ if template != constants.DT_DRBD8:
print qa_utils.FormatInfo("Unsupported template %s, skipping conversion"
" test" % template)
return
+
assert len(inodes) == 2
- AssertCommand(["gnt-instance", "modify", "-t", "plain", name])
- AssertCommand(["gnt-instance", "modify", "-t", "drbd",
- "-n", inodes[1]["primary"], name])
+ AssertCommand(["gnt-instance", "modify", "-t", constants.DT_PLAIN, name])
+ AssertCommand(["gnt-instance", "modify", "-t", constants.DT_DRBD8,
+ "-n", inodes[1].primary, name])
@InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
if qa_config.GetExclusiveStorage():
print qa_utils.FormatInfo("Test not supported with exclusive_storage")
return
- name = instance["name"]
+ name = instance.name
all_size = qa_config.get("disk")
all_grow = qa_config.get("disk-growth")
if not all_grow:
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
def TestInstanceConsole(instance):
"""gnt-instance console"""
- AssertCommand(["gnt-instance", "console", "--show-cmd", instance["name"]])
+ AssertCommand(["gnt-instance", "console", "--show-cmd", instance.name])
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
def buildcmd(args):
cmd = ["gnt-instance", "replace-disks"]
cmd.extend(args)
- cmd.append(instance["name"])
+ cmd.append(instance.name)
return cmd
if not IsDiskReplacingSupported(instance):
# A placeholder; the actual command choice depends on use_ialloc
None,
# Restore the original secondary
- ["--new-secondary=%s" % snode["primary"]],
+ ["--new-secondary=%s" % snode.primary],
]:
if data is None:
if use_ialloc:
data = ["-I", constants.DEFAULT_IALLOCATOR_SHORTCUT]
else:
- data = ["--new-secondary=%s" % othernode["primary"]]
+ data = ["--new-secondary=%s" % othernode.primary]
AssertCommand(buildcmd(data))
AssertCommand(buildcmd(["-a"]))
- AssertCommand(["gnt-instance", "stop", instance["name"]])
+ AssertCommand(["gnt-instance", "stop", instance.name])
AssertCommand(buildcmd(["-a"]), fail=True)
- AssertCommand(["gnt-instance", "activate-disks", instance["name"]])
+ AssertCommand(["gnt-instance", "activate-disks", instance.name])
AssertCommand(["gnt-instance", "activate-disks", "--wait-for-sync",
- instance["name"]])
+ instance.name])
AssertCommand(buildcmd(["-a"]))
- AssertCommand(["gnt-instance", "start", instance["name"]])
+ AssertCommand(["gnt-instance", "start", instance.name])
def _AssertRecreateDisks(cmdargs, instance, fail=False, check=True,
if destroy:
_DestroyInstanceVolumes(instance)
AssertCommand((["gnt-instance", "recreate-disks"] + cmdargs +
- [instance["name"]]), fail)
+ [instance.name]), fail)
if not fail and check:
# Quick check that the disks are there
- AssertCommand(["gnt-instance", "activate-disks", instance["name"]])
+ AssertCommand(["gnt-instance", "activate-disks", instance.name])
AssertCommand(["gnt-instance", "activate-disks", "--wait-for-sync",
- instance["name"]])
- AssertCommand(["gnt-instance", "deactivate-disks", instance["name"]])
+ instance.name])
+ AssertCommand(["gnt-instance", "deactivate-disks", instance.name])
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
"""
options = qa_config.get("options", {})
use_ialloc = options.get("use-iallocators", True)
- other_seq = ":".join([n["primary"] for n in othernodes])
- orig_seq = ":".join([n["primary"] for n in inodes])
+ other_seq = ":".join([n.primary for n in othernodes])
+ orig_seq = ":".join([n.primary for n in inodes])
# These fail because the instance is running
_AssertRecreateDisks(["-n", other_seq], instance, fail=True, destroy=False)
if use_ialloc:
_AssertRecreateDisks(["-I", "hail"], instance, fail=True, destroy=False)
else:
_AssertRecreateDisks(["-n", other_seq], instance, fail=True, destroy=False)
- AssertCommand(["gnt-instance", "stop", instance["name"]])
+ AssertCommand(["gnt-instance", "stop", instance.name])
# Disks exist: this should fail
_AssertRecreateDisks([], instance, fail=True, destroy=False)
# Recreate disks in place
# Move disks back
_AssertRecreateDisks(["-n", orig_seq], instance, check=False)
# This and InstanceCheck decoration check that the disks are working
- AssertCommand(["gnt-instance", "reinstall", "-f", instance["name"]])
- AssertCommand(["gnt-instance", "start", instance["name"]])
+ AssertCommand(["gnt-instance", "reinstall", "-f", instance.name])
+ AssertCommand(["gnt-instance", "start", instance.name])
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
def TestInstanceExport(instance, node):
"""gnt-backup export -n ..."""
- name = instance["name"]
- AssertCommand(["gnt-backup", "export", "-n", node["primary"], name])
+ name = instance.name
+ AssertCommand(["gnt-backup", "export", "-n", node.primary, name])
return qa_utils.ResolveInstanceName(name)
@InstanceCheck(None, INST_DOWN, FIRST_ARG)
def TestInstanceExportWithRemove(instance, node):
"""gnt-backup export --remove-instance"""
- AssertCommand(["gnt-backup", "export", "-n", node["primary"],
- "--remove-instance", instance["name"]])
- qa_config.ReleaseInstance(instance)
+ AssertCommand(["gnt-backup", "export", "-n", node.primary,
+ "--remove-instance", instance.name])
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
def TestInstanceExportNoTarget(instance):
"""gnt-backup export (without target node, should fail)"""
- AssertCommand(["gnt-backup", "export", instance["name"]], fail=True)
+ AssertCommand(["gnt-backup", "export", instance.name], fail=True)
@InstanceCheck(None, INST_DOWN, FIRST_ARG)
cmd = (["gnt-backup", "import",
"--disk-template=%s" % templ,
"--no-ip-check",
- "--src-node=%s" % expnode["primary"],
+ "--src-node=%s" % expnode.primary,
"--src-dir=%s/%s" % (pathutils.EXPORT_DIR, name),
- "--node=%s" % node["primary"]] +
+ "--node=%s" % node.primary] +
_GetGenericAddParameters(newinst, force_mac=constants.VALUE_GENERATE))
- cmd.append(newinst["name"])
+ cmd.append(newinst.name)
AssertCommand(cmd)
- qa_config.SetInstanceTemplate(newinst, templ)
+ newinst.SetDiskTemplate(templ)
def TestBackupList(expnode):
"""gnt-backup list"""
- AssertCommand(["gnt-backup", "list", "--node=%s" % expnode["primary"]])
+ AssertCommand(["gnt-backup", "list", "--node=%s" % expnode.primary])
qa_utils.GenericQueryTest("gnt-backup", query.EXPORT_FIELDS.keys(),
namefield=None, test_unknown=False)
@param set_online: function to call to set the node on-line
"""
- info = _GetInstanceInfo(instance["name"])
+ info = _GetInstanceInfo(instance.name)
set_offline(snode)
try:
TestInstanceRemove(instance)
finally:
set_online(snode)
# Clean up the disks on the offline node
- for minor in info["drbd-minors"][snode["primary"]]:
+ for minor in info["drbd-minors"][snode.primary]:
AssertCommand(["drbdsetup", str(minor), "down"], node=snode)
AssertCommand(["lvremove", "-f"] + info["volumes"], node=snode)
--- /dev/null
+#
+#
+
+# Copyright (C) 2013 Google Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+
+
+"""QA tests for networks.
+
+"""
+
+import qa_config
+import qa_utils
+
+from qa_utils import AssertCommand
+
+
+def GetNonexistentNetworks(count):
+ """Gets network names which shouldn't exist on the cluster.
+
+ @param count: Number of networks to get
+ @rtype: integer
+
+ """
+ return qa_utils.GetNonexistentEntityNames(count, "networks", "network")
+
+
+def TestNetworkAddRemove():
+ """gnt-network add/remove"""
+ (network1, network2) = GetNonexistentNetworks(2)
+
+ # Add some networks of different sizes.
+ # Note: Using RFC5737 addresses.
+ AssertCommand(["gnt-network", "add", "--network", "192.0.2.0/30", network1])
+ AssertCommand(["gnt-network", "add", "--network", "198.51.100.0/24",
+ network2])
+ # Try to add a network with an existing name.
+ AssertCommand(["gnt-network", "add", "--network", "203.0.133.0/24", network2],
+ fail=True)
+
+ AssertCommand(["gnt-network", "remove", network1])
+ AssertCommand(["gnt-network", "remove", network2])
+
+
+def TestNetworkConnect():
+ """gnt-network connect/disconnect"""
+ (group1, ) = qa_utils.GetNonexistentGroups(1)
+ (network1, ) = GetNonexistentNetworks(1)
+
+ default_mode = "bridged"
+ default_link = "xen-br0"
+ nicparams = qa_config.get("default-nicparams")
+ if nicparams:
+ mode = nicparams.get("mode", default_mode)
+ link = nicparams.get("link", default_link)
+ else:
+ mode = default_mode
+ link = default_link
+
+ AssertCommand(["gnt-group", "add", group1])
+ AssertCommand(["gnt-network", "add", "--network", "192.0.2.0/24", network1])
+
+ AssertCommand(["gnt-network", "connect", network1, mode, link, group1])
+ AssertCommand(["gnt-network", "disconnect", network1, group1])
+
+ AssertCommand(["gnt-group", "remove", group1])
+ AssertCommand(["gnt-network", "remove", network1])
def _NodeAdd(node, readd=False):
- if not readd and node.get("_added", False):
- raise qa_error.Error("Node %s already in cluster" % node["primary"])
- elif readd and not node.get("_added", False):
- raise qa_error.Error("Node %s not yet in cluster" % node["primary"])
+ if not readd and node.added:
+ raise qa_error.Error("Node %s already in cluster" % node.primary)
+ elif readd and not node.added:
+ raise qa_error.Error("Node %s not yet in cluster" % node.primary)
cmd = ["gnt-node", "add", "--no-ssh-key-check"]
- if node.get("secondary", None):
- cmd.append("--secondary-ip=%s" % node["secondary"])
+ if node.secondary:
+ cmd.append("--secondary-ip=%s" % node.secondary)
if readd:
cmd.append("--readd")
- cmd.append(node["primary"])
+ cmd.append(node.primary)
AssertCommand(cmd)
- node["_added"] = True
+ if readd:
+ assert node.added
+ else:
+ node.MarkAdded()
def _NodeRemove(node):
- AssertCommand(["gnt-node", "remove", node["primary"]])
- node["_added"] = False
+ AssertCommand(["gnt-node", "remove", node.primary])
+ node.MarkRemoved()
def MakeNodeOffline(node, value):
"""gnt-node modify --offline=value"""
# value in ["yes", "no"]
- AssertCommand(["gnt-node", "modify", "--offline", value, node["primary"]])
+ AssertCommand(["gnt-node", "modify", "--offline", value, node.primary])
def TestNodeAddAll():
master = qa_config.GetMasterNode()
for node in qa_config.get("nodes"):
if node != master:
- node["_added"] = True
+ node.MarkAdded()
def TestNodeRemoveAll():
cmd = ["gnt-node", "list-storage", "--storage-type", storage_type,
"--output=node,name,allocatable", "--separator=|",
"--no-headers"]
- output = qa_utils.GetCommandOutput(master["primary"],
+ output = qa_utils.GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd))
# Test with up to two devices
cmd = ["gnt-node", "list-storage", "--storage-type", storage_type,
"--output=name,allocatable", "--separator=|",
"--no-headers", node_name]
- listout = qa_utils.GetCommandOutput(master["primary"],
+ listout = qa_utils.GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd))
for line in listout.splitlines():
(vfy_name, vfy_allocatable) = line.split("|")
" it to have no primary instances.")
# Fail over to secondary node
- AssertCommand(["gnt-node", "failover", "-f", node["primary"]])
+ AssertCommand(["gnt-node", "failover", "-f", node.primary])
# ... and back again.
- AssertCommand(["gnt-node", "failover", "-f", node2["primary"]])
+ AssertCommand(["gnt-node", "failover", "-f", node2.primary])
def TestNodeEvacuate(node, node2):
# Evacuate all secondary instances
AssertCommand(["gnt-node", "evacuate", "-f",
- "--new-secondary=%s" % node3["primary"], node2["primary"]])
+ "--new-secondary=%s" % node3.primary, node2.primary])
# ... and back again.
AssertCommand(["gnt-node", "evacuate", "-f",
- "--new-secondary=%s" % node2["primary"], node3["primary"]])
+ "--new-secondary=%s" % node2.primary, node3.primary])
finally:
- qa_config.ReleaseNode(node3)
+ node3.Release()
def TestNodeModify(node):
for flag in ["master-candidate", "drained", "offline"]:
for value in ["yes", "no"]:
AssertCommand(["gnt-node", "modify", "--force",
- "--%s=%s" % (flag, value), node["primary"]])
+ "--%s=%s" % (flag, value), node.primary])
AssertCommand(["gnt-node", "modify", "--master-candidate=yes",
- "--auto-promote", node["primary"]])
+ "--auto-promote", node.primary])
# Test setting secondary IP address
- AssertCommand(["gnt-node", "modify", "--secondary-ip=%s" % node["secondary"],
- node["primary"]])
+ AssertCommand(["gnt-node", "modify", "--secondary-ip=%s" % node.secondary,
+ node.primary])
def _CreateOobScriptStructure():
"""Create a simple OOB handling script and its structure."""
master = qa_config.GetMasterNode()
- data_path = qa_utils.UploadData(master["primary"], "")
- verify_path = qa_utils.UploadData(master["primary"], "")
- exit_code_path = qa_utils.UploadData(master["primary"], "")
+ data_path = qa_utils.UploadData(master.primary, "")
+ verify_path = qa_utils.UploadData(master.primary, "")
+ exit_code_path = qa_utils.UploadData(master.primary, "")
oob_script = (("#!/bin/bash\n"
"echo \"$@\" > %s\n"
"exit $(< %s)\n") %
(utils.ShellQuote(verify_path), utils.ShellQuote(data_path),
utils.ShellQuote(exit_code_path)))
- oob_path = qa_utils.UploadData(master["primary"], oob_script, mode=0700)
+ oob_path = qa_utils.UploadData(master.primary, oob_script, mode=0700)
return [oob_path, verify_path, data_path, exit_code_path]
def _UpdateOobFile(path, data):
"""Updates the data file with data."""
master = qa_config.GetMasterNode()
- qa_utils.UploadData(master["primary"], data, filename=path)
+ qa_utils.UploadData(master.primary, data, filename=path)
def _AssertOobCall(verify_path, expected_args):
master = qa_config.GetMasterNode()
verify_output_cmd = utils.ShellQuoteArgs(["cat", verify_path])
- output = qa_utils.GetCommandOutput(master["primary"], verify_output_cmd,
+ output = qa_utils.GetCommandOutput(master.primary, verify_output_cmd,
tty=False)
AssertEqual(expected_args, output.strip())
node = qa_config.AcquireNode(exclude=master)
- master_name = master["primary"]
- node_name = node["primary"]
+ master_name = master.primary
+ node_name = node.primary
full_node_name = qa_utils.ResolveNodeName(node)
(oob_path, verify_path,
AssertCommand(["gnt-node", "health"], fail=True)
# Different OOB script for node
- verify_path2 = qa_utils.UploadData(master["primary"], "")
+ verify_path2 = qa_utils.UploadData(master.primary, "")
oob_script = ("#!/bin/sh\n"
"echo \"$@\" > %s\n") % verify_path2
- oob_path2 = qa_utils.UploadData(master["primary"], oob_script, mode=0700)
+ oob_path2 = qa_utils.UploadData(master.primary, oob_script, mode=0700)
try:
AssertCommand(["gnt-node", "modify", "--node-parameters",
def TestNodeListDrbd(node):
"""gnt-node list-drbd"""
- AssertCommand(["gnt-node", "list-drbd", node["primary"]])
+ AssertCommand(["gnt-node", "list-drbd", node.primary])
def _BuildSetESCmd(action, value, node_name):
"""
for action in ["add", "modify"]:
for value in (True, False, "default"):
- AssertCommand(_BuildSetESCmd(action, value, node["primary"]), fail=True)
+ AssertCommand(_BuildSetESCmd(action, value, node.primary), fail=True)
cmd = " && ".join(parts)
print qa_utils.FormatInfo("Setting up %s with %s OS definition" %
- (node["primary"],
+ (node.primary,
["an invalid", "a valid"][int(valid)]))
AssertCommand(cmd, node=node)
AssertCommand(["gnt-os", "diagnose"], fail=(mode != _ALL_VALID))
# Diagnose again, ignoring exit status
- output = qa_utils.GetCommandOutput(master["primary"],
+ output = qa_utils.GetCommandOutput(master.primary,
"gnt-os diagnose || :")
for line in output.splitlines():
if line.startswith("OS: %s [global status:" % name):
# Check info for all
cmd = ["gnt-os", "info"]
- output = qa_utils.GetCommandOutput(master["primary"],
+ output = qa_utils.GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd))
AssertIn("%s:" % name, output.splitlines())
# Check info for OS
cmd = ["gnt-os", "info", name]
- output = qa_utils.GetCommandOutput(master["primary"],
+ output = qa_utils.GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd)).splitlines()
AssertIn("%s:" % name, output)
for (field, value) in [("valid", mode == _ALL_VALID),
# Only valid OSes should be listed
cmd = ["gnt-os", "list", "--no-headers"]
- output = qa_utils.GetCommandOutput(master["primary"],
+ output = qa_utils.GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd))
if mode == _ALL_VALID and not (hidden or blacklisted):
assert_fn = AssertIn
# Write to temporary file
_rapi_ca = tempfile.NamedTemporaryFile()
- _rapi_ca.write(qa_utils.GetCommandOutput(master["primary"],
+ _rapi_ca.write(qa_utils.GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd)))
_rapi_ca.flush()
cfg_curl = rapi.client.GenericCurlConfig(cafile=_rapi_ca.name,
proxy="")
- _rapi_client = rapi.client.GanetiRapiClient(master["primary"], port=port,
+ _rapi_client = rapi.client.GanetiRapiClient(master.primary, port=port,
username=username,
password=password,
curl_config_fn=cfg_curl)
_VerifyInstance(instance_data)
_DoTests([
- ("/2/instances/%s" % instance["name"], _VerifyInstance, "GET", None),
+ ("/2/instances/%s" % instance.name, _VerifyInstance, "GET", None),
("/2/instances", _VerifyInstancesList, "GET", None),
("/2/instances?bulk=1", _VerifyInstancesBulk, "GET", None),
- ("/2/instances/%s/activate-disks" % instance["name"],
+ ("/2/instances/%s/activate-disks" % instance.name,
_VerifyReturnsJob, "PUT", None),
- ("/2/instances/%s/deactivate-disks" % instance["name"],
+ ("/2/instances/%s/deactivate-disks" % instance.name,
_VerifyReturnsJob, "PUT", None),
])
# Test OpBackupPrepare
(job_id, ) = _DoTests([
("/2/instances/%s/prepare-export?mode=%s" %
- (instance["name"], constants.EXPORT_MODE_REMOTE),
+ (instance.name, constants.EXPORT_MODE_REMOTE),
_VerifyReturnsJob, "PUT", None),
])
_VerifyNode(node_data)
_DoTests([
- ("/2/nodes/%s" % node["primary"], _VerifyNode, "GET", None),
+ ("/2/nodes/%s" % node.primary, _VerifyNode, "GET", None),
("/2/nodes", _VerifyNodesList, "GET", None),
("/2/nodes?bulk=1", _VerifyNodesBulk, "GET", None),
])
def TestRapiInstanceAdd(node, use_client):
"""Test adding a new instance via RAPI"""
instance = qa_config.AcquireInstance()
- qa_config.SetInstanceTemplate(instance, constants.DT_PLAIN)
+ instance.SetDiskTemplate(constants.DT_PLAIN)
try:
disk_sizes = [utils.ParseUnit(size) for size in qa_config.get("disk")]
disks = [{"size": size} for size in disk_sizes]
- nic0_mac = qa_config.GetInstanceNicMac(instance,
- default=constants.VALUE_GENERATE)
+ nic0_mac = instance.GetNicMacAddr(0, constants.VALUE_GENERATE)
nics = [{
constants.INIC_MAC: nic0_mac,
}]
if use_client:
job_id = _rapi_client.CreateInstance(constants.INSTANCE_CREATE,
- instance["name"],
+ instance.name,
constants.DT_PLAIN,
disks, nics,
os=qa_config.get("os"),
- pnode=node["primary"],
+ pnode=node.primary,
beparams=beparams)
else:
body = {
"__version__": 1,
"mode": constants.INSTANCE_CREATE,
- "name": instance["name"],
+ "name": instance.name,
"os_type": qa_config.get("os"),
"disk_template": constants.DT_PLAIN,
- "pnode": node["primary"],
+ "pnode": node.primary,
"beparams": beparams,
"disks": disks,
"nics": nics,
return instance
except:
- qa_config.ReleaseInstance(instance)
+ instance.Release()
raise
def TestRapiInstanceRemove(instance, use_client):
"""Test removing instance via RAPI"""
if use_client:
- job_id = _rapi_client.DeleteInstance(instance["name"])
+ job_id = _rapi_client.DeleteInstance(instance.name)
else:
(job_id, ) = _DoTests([
- ("/2/instances/%s" % instance["name"], _VerifyReturnsJob, "DELETE", None),
+ ("/2/instances/%s" % instance.name, _VerifyReturnsJob, "DELETE", None),
])
_WaitForRapiJob(job_id)
- qa_config.ReleaseInstance(instance)
-
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
def TestRapiInstanceMigrate(instance):
" test")
return
# Move to secondary node
- _WaitForRapiJob(_rapi_client.MigrateInstance(instance["name"]))
+ _WaitForRapiJob(_rapi_client.MigrateInstance(instance.name))
qa_utils.RunInstanceCheck(instance, True)
# And back to previous primary
- _WaitForRapiJob(_rapi_client.MigrateInstance(instance["name"]))
+ _WaitForRapiJob(_rapi_client.MigrateInstance(instance.name))
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
" test")
return
# Move to secondary node
- _WaitForRapiJob(_rapi_client.FailoverInstance(instance["name"]))
+ _WaitForRapiJob(_rapi_client.FailoverInstance(instance.name))
qa_utils.RunInstanceCheck(instance, True)
# And back to previous primary
- _WaitForRapiJob(_rapi_client.FailoverInstance(instance["name"]))
+ _WaitForRapiJob(_rapi_client.FailoverInstance(instance.name))
@InstanceCheck(INST_UP, INST_DOWN, FIRST_ARG)
def TestRapiInstanceShutdown(instance):
"""Test stopping an instance via RAPI"""
- _WaitForRapiJob(_rapi_client.ShutdownInstance(instance["name"]))
+ _WaitForRapiJob(_rapi_client.ShutdownInstance(instance.name))
@InstanceCheck(INST_DOWN, INST_UP, FIRST_ARG)
def TestRapiInstanceStartup(instance):
"""Test starting an instance via RAPI"""
- _WaitForRapiJob(_rapi_client.StartupInstance(instance["name"]))
+ _WaitForRapiJob(_rapi_client.StartupInstance(instance.name))
@InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
@InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
def TestRapiInstanceReinstall(instance):
"""Test reinstalling an instance via RAPI"""
- _WaitForRapiJob(_rapi_client.ReinstallInstance(instance["name"]))
+ _WaitForRapiJob(_rapi_client.ReinstallInstance(instance.name))
# By default, the instance is started again
qa_utils.RunInstanceCheck(instance, True)
# Reinstall again without starting
- _WaitForRapiJob(_rapi_client.ReinstallInstance(instance["name"],
+ _WaitForRapiJob(_rapi_client.ReinstallInstance(instance.name,
no_startup=True))
" skipping test")
return
fn = _rapi_client.ReplaceInstanceDisks
- _WaitForRapiJob(fn(instance["name"],
+ _WaitForRapiJob(fn(instance.name,
mode=constants.REPLACE_DISK_AUTO, disks=[]))
- _WaitForRapiJob(fn(instance["name"],
+ _WaitForRapiJob(fn(instance.name,
mode=constants.REPLACE_DISK_SEC, disks="0"))
default_hv = qa_config.GetDefaultHypervisor()
def _ModifyInstance(**kwargs):
- _WaitForRapiJob(_rapi_client.ModifyInstance(instance["name"], **kwargs))
+ _WaitForRapiJob(_rapi_client.ModifyInstance(instance.name, **kwargs))
_ModifyInstance(beparams={
constants.BE_VCPUS: 3,
@InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
def TestRapiInstanceConsole(instance):
"""Test getting instance console information via RAPI"""
- result = _rapi_client.GetInstanceConsole(instance["name"])
+ result = _rapi_client.GetInstanceConsole(instance.name)
console = objects.InstanceConsole.FromDict(result)
AssertEqual(console.Validate(), True)
- AssertEqual(console.instance, qa_utils.ResolveInstanceName(instance["name"]))
+ AssertEqual(console.instance, qa_utils.ResolveInstanceName(instance.name))
@InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
def TestRapiStoppedInstanceConsole(instance):
"""Test getting stopped instance's console information via RAPI"""
try:
- _rapi_client.GetInstanceConsole(instance["name"])
+ _rapi_client.GetInstanceConsole(instance.name)
except rapi.client.GanetiApiError, err:
AssertEqual(err.code, 503)
else:
rapi_pw_file.write(_rapi_password)
rapi_pw_file.flush()
- qa_config.SetInstanceTemplate(dest_instance,
- qa_config.GetInstanceTemplate(src_instance))
+ dest_instance.SetDiskTemplate(src_instance.disk_template)
# TODO: Run some instance tests before moving back
pnode = inodes[0]
# note: pnode:snode are the *current* nodes, so we move it first to
# tnode:pnode, then back to pnode:snode
- for si, di, pn, sn in [(src_instance["name"], dest_instance["name"],
- tnode["primary"], pnode["primary"]),
- (dest_instance["name"], src_instance["name"],
- pnode["primary"], snode["primary"])]:
+ for si, di, pn, sn in [(src_instance.name, dest_instance.name,
+ tnode.primary, pnode.primary),
+ (dest_instance.name, src_instance.name,
+ pnode.primary, snode.primary)]:
cmd = [
"../tools/move-instance",
"--verbose",
"--dest-primary-node=%s" % pn,
"--dest-secondary-node=%s" % sn,
"--net=0:mac=%s" % constants.VALUE_GENERATE,
- master["primary"],
- master["primary"],
+ master.primary,
+ master.primary,
si,
]
def TestNodeTags(node):
"""gnt-node tags"""
- _TestTags(constants.TAG_NODE, node["primary"])
+ _TestTags(constants.TAG_NODE, node.primary)
def TestGroupTags(group):
def TestInstanceTags(instance):
"""gnt-instance tags"""
- _TestTags(constants.TAG_INSTANCE, instance["name"])
+ _TestTags(constants.TAG_INSTANCE, instance.name)
import subprocess
import random
import tempfile
+import operator
try:
import functools
raise qa_error.Error("%r doesn't match /%r/" % (string, pattern))
-def _GetName(entity, key):
+def _GetName(entity, fn):
"""Tries to get name of an entity.
@type entity: string or dict
- @type key: string
- @param key: Dictionary key containing name
+ @param fn: Function retrieving name from entity
"""
if isinstance(entity, basestring):
result = entity
- elif isinstance(entity, dict):
- result = entity[key]
else:
- raise qa_error.Error("Expected string or dictionary, got %s: %s" %
- (type(entity), entity))
+ result = fn(entity)
if not ht.TNonEmptyString(result):
raise Exception("Invalid name '%s'" % result)
if node is None:
node = qa_config.GetMasterNode()
- nodename = _GetName(node, "primary")
+ nodename = _GetName(node, operator.attrgetter("primary"))
if isinstance(cmd, basestring):
cmdstr = cmd
"""
master = qa_config.GetMasterNode()
- output = GetCommandOutput(master["primary"], utils.ShellQuoteArgs(cmd))
+ output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd))
for line in output.splitlines():
(lkey, lvalue) = line.split(":", 1)
if lkey == key:
"""Gets the full name of a node.
"""
- return _ResolveName(["gnt-node", "info", node["primary"]],
+ return _ResolveName(["gnt-node", "info", node.primary],
"Node name")
# Get list of all instances
cmd = ["gnt-instance", "list", "--separator=:", "--no-headers",
"--output=name,pnode,snodes"]
- output = GetCommandOutput(master["primary"], utils.ShellQuoteArgs(cmd))
+ output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd))
instances = []
for line in output.splitlines():
if names:
cmd.extend(names)
- return GetCommandOutput(master["primary"],
+ return GetCommandOutput(master.primary,
utils.ShellQuoteArgs(cmd)).splitlines()
# Check listed fields (all, must be sorted)
realcmd = [cmd, "list-fields", "--separator=|", "--no-headers"]
- output = GetCommandOutput(master["primary"],
+ output = GetCommandOutput(master.primary,
utils.ShellQuoteArgs(realcmd)).splitlines()
AssertEqual([line.split("|", 1)[0] for line in output],
utils.NiceSort(fields))
"""
master = qa_config.GetMasterNode()
- tmp_hosts = UploadData(master["primary"], "", mode=0644)
+ tmp_hosts = UploadData(master.primary, "", mode=0644)
data = []
for localhost in ("::1", "127.0.0.1"):
"""
master = qa_config.GetMasterNode()
- tmp_hosts = UploadData(master["primary"], "", mode=0644)
+ tmp_hosts = UploadData(master.primary, "", mode=0644)
quoted_tmp_hosts = utils.ShellQuote(tmp_hosts)
sed_data = " ".join(hostnames)
"""Check if instance is running or not.
"""
- instance_name = _GetName(instance, "name")
+ instance_name = _GetName(instance, operator.attrgetter("name"))
script = qa_config.GetInstanceCheckScript()
if not script:
master_node = qa_config.GetMasterNode()
# Build command to connect to master node
- master_ssh = GetSSHCommand(master_node["primary"], "--")
+ master_ssh = GetSSHCommand(master_node.primary, "--")
if running:
running_shellval = "1"
"""Gets group names which shouldn't exist on the cluster.
@param count: Number of groups to get
- @rtype: list
+ @rtype: integer
"""
- groups = qa_config.get("groups", {})
+ return GetNonexistentEntityNames(count, "groups", "group")
- default = ["group1", "group2", "group3"]
+
+def GetNonexistentEntityNames(count, name_config, name_prefix):
+ """Gets entity names which shouldn't exist on the cluster.
+
+ The actualy names can refer to arbitrary entities (for example
+ groups, networks).
+
+ @param count: Number of names to get
+ @rtype: integer
+ @param name_config: name of the leaf in the config containing
+ this entity's configuration, including a 'inexistent-'
+ element
+ @rtype: string
+ @param name_prefix: prefix of the entity's names, used to compose
+ the default values; for example for groups, the prefix is
+ 'group' and the generated names are then group1, group2, ...
+ @rtype: string
+
+ """
+ entities = qa_config.get(name_config, {})
+
+ default = [name_prefix + str(i) for i in range(count)]
assert count <= len(default)
- candidates = groups.get("inexistent-groups", default)[:count]
+ name_config_inexistent = "inexistent-" + name_config
+ candidates = entities.get(name_config_inexistent, default)[:count]
if len(candidates) < count:
- raise Exception("At least %s non-existent groups are needed" % count)
+ raise Exception("At least %s non-existent %s are needed" %
+ (count, name_config))
return candidates
argComplToText (ArgCompletion optc min_cnt max_cnt) =
complToText optc ++ " " ++ show min_cnt ++ " " ++ maybe "none" show max_cnt
--- | Abrreviation for the option type.
+-- | Abbreviation for the option type.
type GenericOptType a = (OptDescr (a -> Result a), OptCompletion)
-- | Type class for options which support help and version.
, getGroupOfNode
, getInstPrimaryNode
, getInstMinorsForNode
+ , getNetwork
, buildLinkIpInstnameMap
, instNodes
) where
ginsts = map (getNodeInstances cfg) gnodes in
(concatMap fst ginsts, concatMap snd ginsts)
+-- | Looks up a network. If looking up by uuid fails, we look up
+-- by name.
+getNetwork :: ConfigData -> String -> ErrorResult Network
+getNetwork cfg name =
+ let networks = fromContainer (configNetworks cfg)
+ in case getItem "Network" name networks of
+ Ok net -> Ok net
+ Bad _ -> let by_name = M.mapKeys
+ (fromNonEmpty . networkName . (M.!) networks)
+ networks
+ in getItem "Network" name by_name
+
-- | Looks up an instance's primary node.
getInstPrimaryNode :: ConfigData -> String -> ErrorResult Node
getInstPrimaryNode cfg name =
, oGroup
, oIAllocSrc
, oInstMoves
+ , oJobDelay
, genOLuxiSocket
, oLuxiSocket
, oMachineReadable
, optIAllocSrc :: Maybe FilePath -- ^ The iallocation spec
, optSelInst :: [String] -- ^ Instances to be excluded
, optLuxi :: Maybe FilePath -- ^ Collect data from Luxi
+ , optJobDelay :: Double -- ^ Delay before executing first job
, optMachineReadable :: Bool -- ^ Output machine-readable format
, optMaster :: String -- ^ Collect data from RAPI
, optMaxLength :: Int -- ^ Stop after this many steps
, optIAllocSrc = Nothing
, optSelInst = []
, optLuxi = Nothing
+ , optJobDelay = 10
, optMachineReadable = False
, optMaster = ""
, optMaxLength = -1
"Specify an iallocator spec as the cluster data source",
OptComplFile)
+oJobDelay :: OptType
+oJobDelay =
+ (Option "" ["job-delay"]
+ (reqWithConversion (tryRead "job delay")
+ (\d opts -> Ok opts { optJobDelay = d }) "SECONDS")
+ "insert this much delay before the execution of repair jobs\
+ \ to allow the tool to continue processing instances",
+ OptComplFloat)
+
genOLuxiSocket :: String -> OptType
genOLuxiSocket defSocket =
(Option "L" ["luxi"]
--- /dev/null
+{-# LANGUAGE TupleSections #-}
+
+{-| Auto-repair tool for Ganeti.
+
+-}
+
+{-
+
+Copyright (C) 2013 Google Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+
+-}
+
+module Ganeti.HTools.Program.Harep
+ ( main
+ , arguments
+ , options) where
+
+import Control.Exception (bracket)
+import Control.Monad
+import Data.Function
+import Data.List
+import Data.Maybe
+import Data.Ord
+import System.Time
+import qualified Data.Map as Map
+
+import Ganeti.BasicTypes
+import Ganeti.Common
+import Ganeti.Errors
+import Ganeti.Jobs
+import Ganeti.OpCodes
+import Ganeti.OpParams
+import Ganeti.Types
+import Ganeti.Utils
+import qualified Ganeti.Constants as C
+import qualified Ganeti.Luxi as L
+import qualified Ganeti.Path as Path
+
+import Ganeti.HTools.CLI
+import Ganeti.HTools.Loader
+import Ganeti.HTools.ExtLoader
+import Ganeti.HTools.Types
+import qualified Ganeti.HTools.Container as Container
+import qualified Ganeti.HTools.Instance as Instance
+import qualified Ganeti.HTools.Node as Node
+
+-- | Options list and functions.
+options :: IO [OptType]
+options = do
+ luxi <- oLuxiSocket
+ return
+ [ luxi
+ , oJobDelay
+ ]
+
+arguments :: [ArgCompletion]
+arguments = []
+
+data InstanceData = InstanceData { arInstance :: Instance.Instance
+ , arState :: AutoRepairStatus
+ , tagsToRemove :: [String]
+ }
+ deriving (Eq, Show)
+
+-- | Parse a tag into an 'AutoRepairData' record.
+--
+-- @Nothing@ is returned if the tag is not an auto-repair tag, or if it's
+-- malformed.
+parseInitTag :: String -> Maybe AutoRepairData
+parseInitTag tag =
+ let parsePending = do
+ subtag <- chompPrefix C.autoRepairTagPending tag
+ case sepSplit ':' subtag of
+ [rtype, uuid, ts, jobs] -> makeArData rtype uuid ts jobs
+ _ -> fail ("Invalid tag: " ++ show tag)
+
+ parseResult = do
+ subtag <- chompPrefix C.autoRepairTagResult tag
+ case sepSplit ':' subtag of
+ [rtype, uuid, ts, result, jobs] -> do
+ arData <- makeArData rtype uuid ts jobs
+ result' <- autoRepairResultFromRaw result
+ return arData { arResult = Just result' }
+ _ -> fail ("Invalid tag: " ++ show tag)
+
+ makeArData rtype uuid ts jobs = do
+ rtype' <- autoRepairTypeFromRaw rtype
+ ts' <- tryRead "auto-repair time" ts
+ jobs' <- mapM makeJobIdS $ sepSplit '+' jobs
+ return AutoRepairData { arType = rtype'
+ , arUuid = uuid
+ , arTime = TOD ts' 0
+ , arJobs = jobs'
+ , arResult = Nothing
+ , arTag = tag
+ }
+ in
+ parsePending `mplus` parseResult
+
+-- | Return the 'AutoRepairData' element of an 'AutoRepairStatus' type.
+getArData :: AutoRepairStatus -> Maybe AutoRepairData
+getArData status =
+ case status of
+ ArHealthy (Just d) -> Just d
+ ArFailedRepair d -> Just d
+ ArPendingRepair d -> Just d
+ ArNeedsRepair d -> Just d
+ _ -> Nothing
+
+-- | Return a short name for each auto-repair status.
+--
+-- This is a more concise representation of the status, because the default
+-- "Show" formatting includes all the accompanying auto-repair data.
+arStateName :: AutoRepairStatus -> String
+arStateName status =
+ case status of
+ ArHealthy _ -> "Healthy"
+ ArFailedRepair _ -> "Failure"
+ ArPendingRepair _ -> "Pending repair"
+ ArNeedsRepair _ -> "Needs repair"
+
+-- | Return a new list of tags to remove that includes @arTag@ if present.
+delCurTag :: InstanceData -> [String]
+delCurTag instData =
+ let arData = getArData $ arState instData
+ rmTags = tagsToRemove instData
+ in
+ case arData of
+ Just d -> arTag d : rmTags
+ Nothing -> rmTags
+
+-- | Set the initial auto-repair state of an instance from its auto-repair tags.
+--
+-- The rules when there are multiple tags is:
+--
+-- * the earliest failure result always wins
+--
+-- * two or more pending repairs results in a fatal error
+--
+-- * a pending result from id X and a success result from id Y result in error
+-- if Y is newer than X
+--
+-- * if there are no pending repairs, the newest success result wins,
+-- otherwise the pending result is used.
+setInitialState :: Instance.Instance -> Result InstanceData
+setInitialState inst =
+ let arData = mapMaybe parseInitTag $ Instance.allTags inst
+ -- Group all the AutoRepairData records by id (i.e. by repair task), and
+ -- present them from oldest to newest.
+ arData' = sortBy (comparing arUuid) arData
+ arGroups = groupBy ((==) `on` arUuid) arData'
+ arGroups' = sortBy (comparing $ minimum . map arTime) arGroups
+ in
+ foldM arStatusCmp (InstanceData inst (ArHealthy Nothing) []) arGroups'
+
+-- | Update the initial status of an instance with new repair task tags.
+--
+-- This function gets called once per repair group in an instance's tag, and it
+-- determines whether to set the status of the instance according to this new
+-- group, or to keep the existing state. See the documentation for
+-- 'setInitialState' for the rules to be followed when determining this.
+arStatusCmp :: InstanceData -> [AutoRepairData] -> Result InstanceData
+arStatusCmp instData arData =
+ let curSt = arState instData
+ arData' = sortBy (comparing keyfn) arData
+ keyfn d = (arResult d, arTime d)
+ newData = last arData'
+ newSt = case arResult newData of
+ Just ArSuccess -> ArHealthy $ Just newData
+ Just ArEnoperm -> ArHealthy $ Just newData
+ Just ArFailure -> ArFailedRepair newData
+ Nothing -> ArPendingRepair newData
+ in
+ case curSt of
+ ArFailedRepair _ -> Ok instData -- Always keep the earliest failure.
+ ArHealthy _ -> Ok instData { arState = newSt
+ , tagsToRemove = delCurTag instData
+ }
+ ArPendingRepair d -> Bad (
+ "An unfinished repair was found in instance " ++
+ Instance.name (arInstance instData) ++ ": found tag " ++
+ show (arTag newData) ++ ", but older pending tag " ++
+ show (arTag d) ++ "exists.")
+
+ ArNeedsRepair _ -> Bad
+ "programming error: ArNeedsRepair found as an initial state"
+
+-- | Query jobs of a pending repair, returning the new instance data.
+processPending :: L.Client -> InstanceData -> IO InstanceData
+processPending client instData =
+ case arState instData of
+ (ArPendingRepair arData) -> do
+ sts <- L.queryJobsStatus client $ arJobs arData
+ time <- getClockTime
+ case sts of
+ Bad e -> exitErr $ "could not check job status: " ++ formatError e
+ Ok sts' ->
+ if any (<= JOB_STATUS_RUNNING) sts' then
+ return instData -- (no change)
+ else do
+ let iname = Instance.name $ arInstance instData
+ srcSt = arStateName $ arState instData
+ destSt = arStateName arState'
+ putStrLn ("Moving " ++ iname ++ " from " ++ show srcSt ++ " to " ++
+ show destSt)
+ commitChange client instData'
+ where
+ instData' =
+ instData { arState = arState'
+ , tagsToRemove = delCurTag instData
+ }
+ arState' =
+ if all (== JOB_STATUS_SUCCESS) sts' then
+ ArHealthy $ Just (updateTag $ arData { arResult = Just ArSuccess
+ , arTime = time })
+ else
+ ArFailedRepair (updateTag $ arData { arResult = Just ArFailure
+ , arTime = time })
+
+ _ -> return instData
+
+-- | Update the tag of an 'AutoRepairData' record to match all the other fields.
+updateTag :: AutoRepairData -> AutoRepairData
+updateTag arData =
+ let ini = [autoRepairTypeToRaw $ arType arData,
+ arUuid arData,
+ clockTimeToString $ arTime arData]
+ end = [intercalate "+" . map (show . fromJobId) $ arJobs arData]
+ (pfx, middle) =
+ case arResult arData of
+ Nothing -> (C.autoRepairTagPending, [])
+ Just rs -> (C.autoRepairTagResult, [autoRepairResultToRaw rs])
+ in
+ arData { arTag = pfx ++ intercalate ":" (ini ++ middle ++ end) }
+
+-- | Apply and remove tags from an instance as indicated by 'InstanceData'.
+--
+-- If the /arState/ of the /InstanceData/ record has an associated
+-- 'AutoRepairData', add its tag to the instance object. Additionally, if
+-- /tagsToRemove/ is not empty, remove those tags from the instance object. The
+-- returned /InstanceData/ object always has an empty /tagsToRemove/.
+commitChange :: L.Client -> InstanceData -> IO InstanceData
+commitChange client instData = do
+ let iname = Instance.name $ arInstance instData
+ arData = getArData $ arState instData
+ rmTags = tagsToRemove instData
+ execJobsWaitOk' opcodes = do
+ res <- execJobsWaitOk [map wrapOpCode opcodes] client
+ case res of
+ Ok _ -> return ()
+ Bad e -> exitErr e
+
+ when (isJust arData) $ do
+ let tag = arTag $ fromJust arData
+ putStrLn (">>> Adding the following tag to " ++ iname ++ ":\n" ++ show tag)
+ execJobsWaitOk' [OpTagsSet (TagInstance iname) [tag]]
+
+ unless (null rmTags) $ do
+ putStr (">>> Removing the following tags from " ++ iname ++ ":\n" ++
+ unlines (map show rmTags))
+ execJobsWaitOk' [OpTagsDel (TagInstance iname) rmTags]
+
+ return instData { tagsToRemove = [] }
+
+-- | Detect brokenness with an instance and suggest repair type and jobs to run.
+detectBroken :: Node.List -> Instance.Instance
+ -> Maybe (AutoRepairType, [OpCode])
+detectBroken nl inst =
+ let disk = Instance.diskTemplate inst
+ iname = Instance.name inst
+ offPri = Node.offline $ Container.find (Instance.pNode inst) nl
+ offSec = Node.offline $ Container.find (Instance.sNode inst) nl
+ in
+ case disk of
+ DTDrbd8
+ | offPri && offSec ->
+ Just (
+ ArReinstall,
+ [ OpInstanceRecreateDisks { opInstanceName = iname
+ , opRecreateDisksInfo = RecreateDisksAll
+ , opNodes = []
+ -- FIXME: there should be a better way to
+ -- specify opcode parameters than abusing
+ -- mkNonEmpty in this way (using the fact
+ -- that Maybe is used both for optional
+ -- fields, and to express failure).
+ , opIallocator = mkNonEmpty "hail"
+ }
+ , OpInstanceReinstall { opInstanceName = iname
+ , opOsType = Nothing
+ , opTempOsParams = Nothing
+ , opForceVariant = False
+ }
+ ])
+ | offPri ->
+ Just (
+ ArFailover,
+ [ OpInstanceFailover { opInstanceName = iname
+ -- FIXME: ditto, see above.
+ , opShutdownTimeout = fromJust $ mkNonNegative
+ C.defaultShutdownTimeout
+ , opIgnoreConsistency = False
+ , opTargetNode = Nothing
+ , opIgnoreIpolicy = False
+ , opIallocator = Nothing
+ }
+ ])
+ | offSec ->
+ Just (
+ ArFixStorage,
+ [ OpInstanceReplaceDisks { opInstanceName = iname
+ , opReplaceDisksMode = ReplaceNewSecondary
+ , opReplaceDisksList = []
+ , opRemoteNode = Nothing
+ -- FIXME: ditto, see above.
+ , opIallocator = mkNonEmpty "hail"
+ , opEarlyRelease = False
+ , opIgnoreIpolicy = False
+ }
+ ])
+ | otherwise -> Nothing
+
+ DTPlain
+ | offPri ->
+ Just (
+ ArReinstall,
+ [ OpInstanceRecreateDisks { opInstanceName = iname
+ , opRecreateDisksInfo = RecreateDisksAll
+ , opNodes = []
+ -- FIXME: ditto, see above.
+ , opIallocator = mkNonEmpty "hail"
+ }
+ , OpInstanceReinstall { opInstanceName = iname
+ , opOsType = Nothing
+ , opTempOsParams = Nothing
+ , opForceVariant = False
+ }
+ ])
+ | otherwise -> Nothing
+
+ _ -> Nothing -- Other cases are unimplemented for now: DTDiskless,
+ -- DTFile, DTSharedFile, DTBlock, DTRbd, DTExt.
+
+-- | Perform the suggested repair on an instance if its policy allows it.
+doRepair :: L.Client -- ^ The Luxi client
+ -> Double -- ^ Delay to insert before the first repair opcode
+ -> InstanceData -- ^ The instance data
+ -> (AutoRepairType, [OpCode]) -- ^ The repair job to perform
+ -> IO InstanceData -- ^ The updated instance data
+doRepair client delay instData (rtype, opcodes) =
+ let inst = arInstance instData
+ ipol = Instance.arPolicy inst
+ iname = Instance.name inst
+ in
+ case ipol of
+ ArEnabled maxtype ->
+ if rtype > maxtype then do
+ uuid <- newUUID
+ time <- getClockTime
+
+ let arState' = ArNeedsRepair (
+ updateTag $ AutoRepairData rtype uuid time [] (Just ArEnoperm) "")
+ instData' = instData { arState = arState'
+ , tagsToRemove = delCurTag instData
+ }
+
+ putStrLn ("Not performing a repair of type " ++ show rtype ++ " on " ++
+ iname ++ " because only repairs up to " ++ show maxtype ++
+ " are allowed")
+ commitChange client instData' -- Adds "enoperm" result label.
+ else do
+ putStrLn ("Executing " ++ show rtype ++ " repair on " ++ iname)
+
+ -- After submitting the job, we must write an autorepair:pending tag,
+ -- that includes the repair job IDs so that they can be checked later.
+ -- One problem we run into is that the repair job immediately grabs
+ -- locks for the affected instance, and the subsequent TAGS_SET job is
+ -- blocked, introducing an unnecessary delay for the end-user. One
+ -- alternative would be not to wait for the completion of the TAGS_SET
+ -- job, contrary to what commitChange normally does; but we insist on
+ -- waiting for the tag to be set so as to abort in case of failure,
+ -- because the cluster is left in an invalid state in that case.
+ --
+ -- The proper solution (in 2.9+) would be not to use tags for storing
+ -- autorepair data, or make the TAGS_SET opcode not grab an instance's
+ -- locks (if that's deemed safe). In the meantime, we introduce an
+ -- artificial delay in the repair job (via a TestDelay opcode) so that
+ -- once we have the job ID, the TAGS_SET job can complete before the
+ -- repair job actually grabs the locks. (Please note that this is not
+ -- about synchronization, but merely about speeding up the execution of
+ -- the harep tool. If this TestDelay opcode is removed, the program is
+ -- still correct.)
+ let opcodes' =
+ if delay > 0 then
+ OpTestDelay { opDelayDuration = delay
+ , opDelayOnMaster = True
+ , opDelayOnNodes = []
+ , opDelayRepeat = fromJust $ mkNonNegative 0
+ } : opcodes
+ else
+ opcodes
+
+ uuid <- newUUID
+ time <- getClockTime
+ jids <- submitJobs [map wrapOpCode opcodes'] client
+
+ case jids of
+ Bad e -> exitErr e
+ Ok jids' ->
+ let arState' = ArPendingRepair (
+ updateTag $ AutoRepairData rtype uuid time jids' Nothing "")
+ instData' = instData { arState = arState'
+ , tagsToRemove = delCurTag instData
+ }
+ in
+ commitChange client instData' -- Adds "pending" label.
+
+ otherSt -> do
+ putStrLn ("Not repairing " ++ iname ++ " because it's in state " ++
+ show otherSt)
+ return instData
+
+-- | Main function.
+main :: Options -> [String] -> IO ()
+main opts args = do
+ unless (null args) $
+ exitErr "this program doesn't take any arguments."
+
+ luxiDef <- Path.defaultLuxiSocket
+ let master = fromMaybe luxiDef $ optLuxi opts
+ opts' = opts { optLuxi = Just master }
+
+ (ClusterData _ nl il _ _) <- loadExternalData opts'
+
+ let iniDataRes = mapM setInitialState $ Container.elems il
+ iniData <- exitIfBad "when parsing auto-repair tags" iniDataRes
+
+ -- First step: check all pending repairs, see if they are completed.
+ iniData' <- bracket (L.getClient master) L.closeClient $
+ forM iniData . processPending
+
+ -- Second step: detect any problems.
+ let repairs = map (detectBroken nl . arInstance) iniData'
+
+ -- Third step: create repair jobs for broken instances that are in ArHealthy.
+ let maybeRepair c (i, r) = maybe (return i) (repairHealthy c i) r
+ jobDelay = optJobDelay opts
+ repairHealthy c i = case arState i of
+ ArHealthy _ -> doRepair c jobDelay i
+ _ -> const (return i)
+
+ repairDone <- bracket (L.getClient master) L.closeClient $
+ forM (zip iniData' repairs) . maybeRepair
+
+ -- Print some stats and exit.
+ let states = map ((, 1 :: Int) . arStateName . arState) repairDone
+ counts = Map.fromListWith (+) states
+
+ putStrLn "---------------------"
+ putStrLn "Instance status count"
+ putStrLn "---------------------"
+ putStr . unlines . Map.elems $
+ Map.mapWithKey (\k v -> k ++ ": " ++ show v) counts
import Ganeti.Common (formatCommands, PersonalityList)
import Ganeti.HTools.CLI (Options, parseOpts, genericOpts)
import qualified Ganeti.HTools.Program.Hail as Hail
+import qualified Ganeti.HTools.Program.Harep as Harep
import qualified Ganeti.HTools.Program.Hbal as Hbal
import qualified Ganeti.HTools.Program.Hcheck as Hcheck
import qualified Ganeti.HTools.Program.Hscan as Hscan
"Ganeti IAllocator plugin that implements the instance\
\ placement and movement using the same algorithm as\
\ hbal(1)"))
+ , ("harep", (Harep.main, Harep.options, Harep.arguments,
+ "auto-repair tool that detects certain kind of problems\
+ \ with instances and applies the allowed set of solutions"))
, ("hbal", (Hbal.main, Hbal.options, Hbal.arguments,
"cluster balancer that looks at the current state of\
\ the cluster and computes a series of steps designed\
-- | The possible auto-repair results.
$(THH.declareSADT "AutoRepairResult"
- [ ("ArSuccess", 'C.autoRepairSuccess)
+ -- Order is important here: higher results take precedence when an object
+ -- has several result annotations attached.
+ [ ("ArEnoperm", 'C.autoRepairEnoperm)
+ , ("ArSuccess", 'C.autoRepairSuccess)
, ("ArFailure", 'C.autoRepairFailure)
- , ("ArEnoperm", 'C.autoRepairEnoperm)
])
-- | The possible auto-repair policy for a given instance.
-- | The possible auto-repair states for any given instance.
data AutoRepairStatus
- = ArHealthy -- ^ No problems detected with the instance
+ = ArHealthy (Maybe AutoRepairData) -- ^ No problems detected with the instance
| ArNeedsRepair AutoRepairData -- ^ Instance has problems, no action taken
| ArPendingRepair AutoRepairData -- ^ Repair jobs ongoing for the instance
| ArFailedRepair AutoRepairData -- ^ Some repair jobs for the instance failed
+ deriving (Eq, Show)
-- | The data accompanying a repair operation (future, pending, or failed).
data AutoRepairData = AutoRepairData { arType :: AutoRepairType
, arTime :: ClockTime
, arJobs :: [JobId]
, arResult :: Maybe AutoRepairResult
+ , arTag :: String
}
+ deriving (Eq, Show)
-}
module Ganeti.Jobs
- ( execJobsWait
+ ( submitJobs
+ , execJobsWait
+ , execJobsWaitOk
, waitForJobs
) where
import Control.Concurrent (threadDelay)
+import Data.List
import Ganeti.BasicTypes
import Ganeti.Errors
import Ganeti.OpCodes
import Ganeti.Types
+-- | Submits a set of jobs and returns their job IDs without waiting for
+-- completion.
+submitJobs :: [[MetaOpCode]] -> L.Client -> IO (Result [L.JobId])
+submitJobs opcodes client = do
+ jids <- L.submitManyJobs client opcodes
+ return (case jids of
+ Bad e -> Bad $ "Job submission error: " ++ formatError e
+ Ok jids' -> Ok jids')
+
-- | Executes a set of jobs and waits for their completion, returning their
-- status.
execJobsWait :: [[MetaOpCode]] -- ^ The list of jobs
-> L.Client -- ^ The Luxi client
-> IO (Result [(L.JobId, JobStatus)])
execJobsWait opcodes callback client = do
- jids <- L.submitManyJobs client opcodes
+ jids <- submitJobs opcodes client
case jids of
- Bad e -> return . Bad $ "Job submission error: " ++ formatError e
+ Bad e -> return $ Bad e
Ok jids' -> do
callback jids'
waitForJobs jids' client
--- | Polls a set of jobs at a fixed interval until all are finished
--- one way or another.
+-- | Polls a set of jobs at an increasing interval until all are finished one
+-- way or another.
waitForJobs :: [L.JobId] -> L.Client -> IO (Result [(L.JobId, JobStatus)])
-waitForJobs jids client = do
- sts <- L.queryJobsStatus client jids
+waitForJobs jids client = waitForJobs' 500000 15000000
+ where
+ waitForJobs' delay maxdelay = do
+ -- TODO: this should use WaitForJobChange once it's available in Haskell
+ -- land, instead of a fixed schedule of sleeping intervals.
+ threadDelay $ min delay maxdelay
+ sts <- L.queryJobsStatus client jids
+ case sts of
+ Bad e -> return . Bad $ "Checking job status: " ++ formatError e
+ Ok sts' -> if any (<= JOB_STATUS_RUNNING) sts' then
+ waitForJobs' (delay * 2) maxdelay
+ else
+ return . Ok $ zip jids sts'
+
+-- | Execute jobs and return @Ok@ only if all of them succeeded.
+execJobsWaitOk :: [[MetaOpCode]] -> L.Client -> IO (Result ())
+execJobsWaitOk opcodes client = do
+ let nullog = const (return () :: IO ())
+ failed = filter ((/=) JOB_STATUS_SUCCESS . snd)
+ fmtfail (i, s) = show (fromJobId i) ++ "=>" ++ jobStatusToRaw s
+ sts <- execJobsWait opcodes nullog client
case sts of
- Bad e -> return . Bad $ "Checking job status: " ++ formatError e
- Ok sts' -> if any (<= JOB_STATUS_RUNNING) sts'
- then do
- -- TODO: replace hardcoded value with a better thing
- threadDelay (1000000 * 15)
- waitForJobs jids client
- else return . Ok $ zip jids sts'
+ Bad e -> return $ Bad e
+ Ok sts' -> return (if null $ failed sts' then
+ Ok ()
+ else
+ Bad ("The following jobs failed: " ++
+ (intercalate ", " . map fmtfail $ failed sts')))
, optionalField $
simpleField "ext_reservations" [t| String |]
]
+ ++ uuidFields
++ serialFields
++ tagsFields)
instance TagsObject Network where
tagsOf = networkTags
+instance UuidObject Network where
+ uuidOf = networkUuid
+
-- * NIC definitions
$(buildParam "Nic" "nicp"
, simpleField "nodes" [t| Container Node |]
, simpleField "nodegroups" [t| Container NodeGroup |]
, simpleField "instances" [t| Container Instance |]
+ , simpleField "networks" [t| Container Network |]
]
++ serialFields)
( rsNoData
, rsUnavail
, rsNormal
- , rsMaybe
+ , rsMaybeNoData
+ , rsMaybeUnavail
, rsUnknown
, missingRuntime
, rpcErrorToStatus
-- missing, in which case we return no data). Note that there's some
-- ambiguity here: in some cases, we mean 'RSNoData', but in other
-- 'RSUnavail'; this is easy to solve in simple cases, but not in
--- nested dicts.
-rsMaybe :: (JSON a) => Maybe a -> ResultEntry
-rsMaybe = maybe rsNoData rsNormal
+-- nested dicts. If you want to return 'RSUnavail' in case of 'Nothing'
+-- use the function 'rsMaybeUnavail'.
+rsMaybeNoData :: (JSON a) => Maybe a -> ResultEntry
+rsMaybeNoData = maybe rsNoData rsNormal
+
+-- | Helper to declare a result from a 'Maybe'. This version returns
+-- a 'RSUnavail' in case of 'Nothing'. It should be used for optional
+-- fields that are not set. For cases where 'Nothing' means that there
+-- was an error, consider using 'rsMaybe' instead.
+rsMaybeUnavail :: (JSON a) => Maybe a -> ResultEntry
+rsMaybeUnavail = maybe rsUnavail rsNormal
-- | Helper for unknown field result.
rsUnknown :: ResultEntry
-- levels of maybe: the top level dict might be missing, or one key in
-- the dictionary might be.
dictFieldGetter :: (DictObject a) => String -> Maybe a -> ResultEntry
-dictFieldGetter k = maybe rsNoData (rsMaybe . lookup k . toDict)
+dictFieldGetter k = maybe rsNoData (rsMaybeNoData . lookup k . toDict)
-- | Build an optimised lookup map from a Python _PARAMETER_TYPES
-- association list.
, ("QRGroup", 'C.qrGroup )
, ("QROs", 'C.qrOs )
, ("QRExport", 'C.qrExport )
+ , ("QRNetwork", 'C.qrNetwork )
])
$(makeJSONInstance ''QueryTypeOp)
--- /dev/null
+{-| Implementation of the Ganeti Query2 node group queries.
+
+ -}
+
+{-
+
+Copyright (C) 2012 Google Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+
+-}
+
+module Ganeti.Query.Network
+ ( NetworkRuntime(..)
+ , networkFieldsMap
+ ) where
+
+import qualified Data.Map as Map
+import Data.Maybe (fromMaybe, mapMaybe)
+import Data.List (find)
+
+import Ganeti.JSON
+import Ganeti.Network
+import Ganeti.Objects
+import Ganeti.Query.Language
+import Ganeti.Query.Common
+import Ganeti.Query.Types
+import Ganeti.Types
+
+data NetworkRuntime = NetworkRuntime
+
+networkFields :: FieldList Network NetworkRuntime
+networkFields =
+ [ (FieldDefinition "name" "Name" QFTText "Network name",
+ FieldSimple (rsNormal . networkName), QffNormal)
+ , (FieldDefinition "network" "Subnet" QFTText "IPv4 subnet",
+ FieldSimple (rsNormal . networkNetwork), QffNormal)
+ , (FieldDefinition "gateway" "Gateway" QFTOther "IPv4 gateway",
+ FieldSimple (rsMaybeUnavail . networkGateway), QffNormal)
+ , (FieldDefinition "network6" "IPv6Subnet" QFTOther "IPv6 subnet",
+ FieldSimple (rsMaybeUnavail . networkNetwork6), QffNormal)
+ , (FieldDefinition "gateway6" "IPv6Gateway" QFTOther "IPv6 gateway",
+ FieldSimple (rsMaybeUnavail . networkGateway6), QffNormal)
+ , (FieldDefinition "mac_prefix" "MacPrefix" QFTOther "MAC address prefix",
+ FieldSimple (rsMaybeUnavail . networkMacPrefix), QffNormal)
+ , (FieldDefinition "free_count" "FreeCount" QFTOther "Number of free IPs",
+ FieldSimple (rsMaybeNoData . fmap getFreeCount . createAddressPool),
+ QffNormal)
+ , (FieldDefinition "map" "Map" QFTText "Map of the network's reserved IPs",
+ FieldSimple (rsMaybeNoData . fmap getMap . createAddressPool),
+ QffNormal)
+ , (FieldDefinition "reserved_count" "ReservedCount" QFTOther
+ "Number of reserved IPs",
+ FieldSimple (rsMaybeNoData . fmap getReservedCount . createAddressPool),
+ QffNormal)
+ , (FieldDefinition "group_list" "GroupList" QFTOther "List of node groups",
+ FieldConfig (\cfg -> rsNormal . getGroupConnections cfg . networkUuid),
+ QffNormal)
+ , (FieldDefinition "group_cnt" "GroupCount" QFTOther "Number of node groups",
+ FieldConfig (\cfg -> rsNormal . length . getGroupConnections cfg
+ . networkUuid), QffNormal)
+ , (FieldDefinition "inst_list" "InstanceList" QFTOther "List of instances",
+ FieldConfig (\cfg -> rsNormal . getInstances cfg . networkUuid),
+ QffNormal)
+ , (FieldDefinition "inst_cnt" "InstanceCount" QFTOther "Number of instances",
+ FieldConfig (\cfg -> rsNormal . length . getInstances cfg
+ . networkUuid), QffNormal)
+ ] ++
+ uuidFields "Network" ++
+ serialFields "Network" ++
+ tagsFields
+
+-- | The group fields map.
+networkFieldsMap :: FieldMap Network NetworkRuntime
+networkFieldsMap =
+ Map.fromList $ map (\v@(f, _, _) -> (fdefName f, v)) networkFields
+
+-- TODO: the following fields are not implemented yet: external_reservations,
+-- inst_cnt, inst_list
+
+-- | Given a network's UUID, this function lists all connections from
+-- the network to nodegroups including the respective mode and links.
+getGroupConnections :: ConfigData -> String -> [(String, String, String)]
+getGroupConnections cfg network_uuid =
+ mapMaybe (getGroupConnection network_uuid)
+ ((Map.elems . fromContainer . configNodegroups) cfg)
+
+-- | Given a network's UUID and a node group, this function assembles
+-- a tuple of the group's name, the mode and the link by which the
+-- network is connected to the group. Returns 'Nothing' if the network
+-- is not connected to the group.
+getGroupConnection :: String -> NodeGroup -> Maybe (String, String, String)
+getGroupConnection network_uuid group =
+ let networks = fromContainer . groupNetworks $ group
+ in case Map.lookup network_uuid networks of
+ Nothing -> Nothing
+ Just net ->
+ Just (groupName group, getNicMode net, getNicLink net)
+
+-- | Retrieves the network's mode and formats it human-readable,
+-- also in case it is not available.
+getNicMode :: PartialNicParams -> String
+getNicMode nic_params =
+ maybe "-" nICModeToRaw $ nicpModeP nic_params
+
+-- | Retrieves the network's link and formats it human-readable, also in
+-- case it it not available.
+getNicLink :: PartialNicParams -> String
+getNicLink nic_params = fromMaybe "-" (nicpLinkP nic_params)
+
+-- | Retrieves the network's instances' names.
+getInstances :: ConfigData -> String -> [String]
+getInstances cfg network_uuid =
+ map instName (filter (instIsConnected cfg network_uuid)
+ ((Map.elems . fromContainer . configInstances) cfg))
+
+-- | Helper function that checks if an instance is linked to the given network.
+instIsConnected :: ConfigData -> String -> Instance -> Bool
+instIsConnected cfg network_uuid inst =
+ network_uuid `elem` mapMaybe (getNetworkUuid cfg)
+ (mapMaybe nicNetwork (instNics inst))
+
+-- | Helper function to look up a network's UUID by its name
+getNetworkUuid :: ConfigData -> String -> Maybe String
+getNetworkUuid cfg name =
+ let net = find (\n -> name == fromNonEmpty (networkName n))
+ ((Map.elems . fromContainer . configNetworks) cfg)
+ in fmap networkUuid net
QffNormal)
, (FieldDefinition "group" "Group" QFTText "Node group",
FieldConfig (\cfg node ->
- rsMaybe (groupName <$> getGroupOfNode cfg node)),
+ rsMaybeNoData (groupName <$> getGroupOfNode cfg node)),
QffNormal)
, (FieldDefinition "group.uuid" "GroupUUID" QFTText "UUID of node group",
FieldSimple (rsNormal . nodeGroup), QffNormal)
, (FieldDefinition "ndparams" "NodeParameters" QFTOther
"Merged node parameters",
- FieldConfig ((rsMaybe .) . getNodeNdParams), QffNormal)
+ FieldConfig ((rsMaybeNoData .) . getNodeNdParams), QffNormal)
, (FieldDefinition "custom_ndparams" "CustomNodeParameters" QFTOther
"Custom node parameters",
FieldSimple (rsNormal . nodeNdparams), QffNormal)
import qualified Ganeti.Query.Job as Query.Job
import Ganeti.Query.Group
import Ganeti.Query.Language
+import Ganeti.Query.Network
import Ganeti.Query.Node
import Ganeti.Query.Types
import Ganeti.Path
fgroups <- filterM (\n -> evaluateFilter cfg Nothing n cfilter) groups
let fdata = map (\node ->
map (execGetter cfg GroupRuntime node) fgetters) fgroups
- return QueryResult {qresFields = fdefs, qresData = fdata }
+ return QueryResult { qresFields = fdefs, qresData = fdata }
+
+queryInner cfg _ (Query (ItemTypeOpCode QRNetwork) fields qfilter) wanted =
+ return $ do
+ cfilter <- compileFilter networkFieldsMap qfilter
+ let selected = getSelectedFields networkFieldsMap fields
+ (fdefs, fgetters, _) = unzip3 selected
+ networks <- case wanted of
+ [] -> Ok . niceSortKey (fromNonEmpty . networkName) .
+ Map.elems . fromContainer $ configNetworks cfg
+ _ -> mapM (getNetwork cfg) wanted
+ fnetworks <- filterM (\n -> evaluateFilter cfg Nothing n cfilter) networks
+ let fdata = map (\network ->
+ map (execGetter cfg NetworkRuntime network) fgetters)
+ fnetworks
+ return QueryResult { qresFields = fdefs, qresData = fdata }
queryInner _ _ (Query qkind _ _) _ =
return . Bad . GenericError $ "Query '" ++ show qkind ++ "' not supported"
])
$(THH.makeJSONInstance ''IAllocatorMode)
--- | Netork mode.
+-- | Network mode.
$(THH.declareSADT "NICMode"
[ ("NMBridged", 'C.nicModeBridged)
, ("NMRouted", 'C.nicModeRouted)
(which is assumed to be a separator) to be absent from the string if the string
terminates there.
->>> chompPrefix "foo:bar:" "a:b:c"
+\>>> chompPrefix \"foo:bar:\" \"a:b:c\"
Nothing
->>> chompPrefix "foo:bar:" "foo:bar:baz"
-Just "baz"
+\>>> chompPrefix \"foo:bar:\" \"foo:bar:baz\"
+Just \"baz\"
->>> chompPrefix "foo:bar:" "foo:bar:"
-Just ""
+\>>> chompPrefix \"foo:bar:\" \"foo:bar:\"
+Just \"\"
->>> chompPrefix "foo:bar:" "foo:bar"
-Just ""
+\>>> chompPrefix \"foo:bar:\" \"foo:bar\"
+Just \"\"
->>> chompPrefix "foo:bar:" "foo:barbaz"
+\>>> chompPrefix \"foo:bar:\" \"foo:barbaz\"
Nothing
-}
chompPrefix :: String -> String -> Maybe String
--- /dev/null
+{
+ "name": "xen-test-qa-minimal-nodes-instances-only",
+
+ "disk": ["1G", "512M"],
+ "disk-growth": ["2G", "768M"],
+
+ "nodes": [
+ {
+ "# Master node": null,
+ "primary": "xen-test-0",
+ "secondary": "192.0.2.1"
+ },
+
+ {
+ "primary": "xen-test-1",
+ "secondary": "192.0.2.2"
+ },
+
+ {
+ "primary": "xen-test-2",
+ "secondary": "192.0.2.3"
+ },
+
+ {
+ "primary": "xen-test-3",
+ "secondary": "192.0.2.4"
+ }
+ ],
+
+ "instances": [
+ {
+ "name": "xen-test-inst1",
+ "nic.mac/0": "AA:00:00:11:11:11"
+ },
+ {
+ "name": "xen-test-inst2",
+ "nic.mac/0": "AA:00:00:22:22:22"
+ }
+ ],
+
+ "tests": {
+ "default": false
+ },
+
+ "# vim: set syntax=javascript :": null
+}
--- /dev/null
+host : host.example.com
+release : 3.2.0
+version : #1 SMP Tue Jan 1 00:00:00 UTC 2013
+machine : x86_64
+nr_cpus : 4
+nr_nodes : 1
+cores_per_socket : 2
+threads_per_core : 1
+cpu_mhz : 2800
+hw_caps : bfebfbff:20100800:00000000:00000940:0004e3bd:00000000:00000001:00000000
+virt_caps :
+total_memory : 16378
+free_memory : 8004
+node_to_cpu : node0:0-3
+node_to_memory : node0:8004
+node_to_dma32_mem : node0:2985
+max_node_id : 0
+xen_major : 4
+xen_minor : 0
+xen_extra : .1
+xen_caps : xen-3.0-x86_64 xen-3.0-x86_32p
+xen_scheduler : credit
+xen_pagesize : 4096
+platform_params : virt_start=0xffff800000000000
+xen_changeset : unavailable
+xen_commandline : placeholder dom0_mem=1024M com1=115200,8n1 console=com1
+cc_compiler : gcc version 4.4.5 (Debian 4.4.5-8)
+cc_compile_by : user
+cc_compile_domain : example.com
+cc_compile_date : Tue Jan 1 00:00:00 UTC 2013
+xend_config_format : 4
--- /dev/null
+Name ID Mem VCPUs State Time(s)
+Domain-0 0 1023 1 r----- 121152.6
--- /dev/null
+Name ID Mem VCPUs State Time(s)
+Domain-0 0 1023 1 r----- 154706.1
+server01.example.com 1 1024 1 -b---- 167643.2
+web3106215069.example.com 3 4096 1 -b---- 466690.9
+testinstance.example.com 2 2048 2 r----- 244443.0
[ " hail - Ganeti IAllocator plugin that implements the instance\
\ placement and"
, " movement using the same algorithm as hbal(1)"
+ , " harep - auto-repair tool that detects certain kind of problems\
+ \ with instances"
+ , " and applies the allowed set of solutions"
, " hbal - cluster balancer that looks at the current state of\
\ the cluster and"
, " computes a series of steps designed to bring the\
gateway6 <- genMaybe genIp6Addr
res <- liftM Just (genBitString $ netmask2NumHosts netmask)
ext_res <- liftM Just (genBitString $ netmask2NumHosts netmask)
+ uuid <- arbitrary
let n = Network name mac_prefix net net6 gateway
- gateway6 res ext_res 0 Set.empty
+ gateway6 res ext_res uuid 0 Set.empty
return n
-- | Generate an arbitrary string consisting of '0' and '1' of the given length.
show (map fst nodes'))
else GenericContainer nodemap
continsts = GenericContainer Map.empty
+ networks = GenericContainer Map.empty
grp <- arbitrary
let contgroups = GenericContainer $ Map.singleton guuid grp
serial <- arbitrary
cluster <- resize 8 arbitrary
- let c = ConfigData version cluster contnodes contgroups continsts serial
+ let c = ConfigData version cluster contnodes contgroups continsts networks
+ serial
return c
-- * Test properties
self.assertFalse(constants.JOBS_PENDING - constants.JOB_STATUS_ALL)
self.assertFalse(constants.JOBS_FINALIZED - constants.JOB_STATUS_ALL)
+ def testDefaultsForAllHypervisors(self):
+ self.assertEqual(frozenset(constants.HVC_DEFAULTS.keys()),
+ constants.HYPER_TYPES)
+
+ def testDefaultHypervisor(self):
+ self.assertTrue(constants.DEFAULT_ENABLED_HYPERVISOR in
+ constants.HYPER_TYPES)
+
class TestExportedNames(unittest.TestCase):
_VALID_NAME_RE = re.compile(r"^[A-Z][A-Z0-9_]+$")
import unittest
import socket
import os
+import struct
from ganeti import serializer
from ganeti import constants
self.assertFalse(boot_re.search(help_01590))
+class TestGetTunFeatures(unittest.TestCase):
+ def testWrongIoctl(self):
+ tmpfile = tempfile.NamedTemporaryFile()
+ # A file does not have the right ioctls, so this must always fail
+ result = hv_kvm._GetTunFeatures(tmpfile.fileno())
+ self.assertTrue(result is None)
+
+ def _FakeIoctl(self, features, fd, request, buf):
+ self.assertEqual(request, hv_kvm.TUNGETFEATURES)
+
+ (reqno, ) = struct.unpack("I", buf)
+ self.assertEqual(reqno, 0)
+
+ return struct.pack("I", features)
+
+ def test(self):
+ tmpfile = tempfile.NamedTemporaryFile()
+ fd = tmpfile.fileno()
+
+ for features in [0, hv_kvm.IFF_VNET_HDR]:
+ fn = compat.partial(self._FakeIoctl, features)
+ result = hv_kvm._GetTunFeatures(fd, _ioctl=fn)
+ self.assertEqual(result, features)
+
+
+class TestProbeTapVnetHdr(unittest.TestCase):
+ def _FakeTunFeatures(self, expected_fd, flags, fd):
+ self.assertEqual(fd, expected_fd)
+ return flags
+
+ def test(self):
+ tmpfile = tempfile.NamedTemporaryFile()
+ fd = tmpfile.fileno()
+
+ for flags in [0, hv_kvm.IFF_VNET_HDR]:
+ fn = compat.partial(self._FakeTunFeatures, fd, flags)
+
+ result = hv_kvm._ProbeTapVnetHdr(fd, _features_fn=fn)
+ if flags == 0:
+ self.assertFalse(result)
+ else:
+ self.assertTrue(result)
+
+ def testUnsupported(self):
+ tmpfile = tempfile.NamedTemporaryFile()
+ fd = tmpfile.fileno()
+
+ self.assertFalse(hv_kvm._ProbeTapVnetHdr(fd, _features_fn=lambda _: None))
+
+
if __name__ == "__main__":
testutils.GanetiTestProgram()
#!/usr/bin/python
#
-# Copyright (C) 2011 Google Inc.
+# Copyright (C) 2011, 2013 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
"""Script for testing ganeti.hypervisor.hv_lxc"""
+import string # pylint: disable=W0402
import unittest
+import tempfile
+import shutil
+import random
+import os
from ganeti import constants
from ganeti import objects
from ganeti import hypervisor
+from ganeti import utils
+from ganeti import errors
+from ganeti import compat
from ganeti.hypervisor import hv_xen
import testutils
+# Map from hypervisor class to hypervisor name
+HVCLASS_TO_HVNAME = utils.InvertDict(hypervisor._HYPERVISOR_MAP)
+
+
class TestConsole(unittest.TestCase):
def test(self):
for cls in [hv_xen.XenPvmHypervisor, hv_xen.XenHvmHypervisor]:
constants.CPU_PINNING_ALL_XEN))
+class TestParseXmList(testutils.GanetiTestCase):
+ def test(self):
+ data = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt")
+
+ # Exclude node
+ self.assertEqual(hv_xen._ParseXmList(data.splitlines(), False), [])
+
+ # Include node
+ result = hv_xen._ParseXmList(data.splitlines(), True)
+ self.assertEqual(len(result), 1)
+ self.assertEqual(len(result[0]), 6)
+
+ # Name
+ self.assertEqual(result[0][0], hv_xen._DOM0_NAME)
+
+ # ID
+ self.assertEqual(result[0][1], 0)
+
+ # Memory
+ self.assertEqual(result[0][2], 1023)
+
+ # VCPUs
+ self.assertEqual(result[0][3], 1)
+
+ # State
+ self.assertEqual(result[0][4], "r-----")
+
+ # Time
+ self.assertAlmostEqual(result[0][5], 121152.6)
+
+ def testWrongLineFormat(self):
+ tests = [
+ ["three fields only"],
+ ["name InvalidID 128 1 r----- 12345"],
+ ]
+
+ for lines in tests:
+ try:
+ hv_xen._ParseXmList(["Header would be here"] + lines, False)
+ except errors.HypervisorError, err:
+ self.assertTrue("Can't parse output of xm list" in str(err))
+ else:
+ self.fail("Exception was not raised")
+
+
+class TestGetXmList(testutils.GanetiTestCase):
+ def _Fail(self):
+ return utils.RunResult(constants.EXIT_FAILURE, None,
+ "stdout", "stderr", None,
+ NotImplemented, NotImplemented)
+
+ def testTimeout(self):
+ fn = testutils.CallCounter(self._Fail)
+ try:
+ hv_xen._GetXmList(fn, False, _timeout=0.1)
+ except errors.HypervisorError, err:
+ self.assertTrue("timeout exceeded" in str(err))
+ else:
+ self.fail("Exception was not raised")
+
+ self.assertTrue(fn.Count() < 10,
+ msg="'xm list' was called too many times")
+
+ def _Success(self, stdout):
+ return utils.RunResult(constants.EXIT_SUCCESS, None, stdout, "", None,
+ NotImplemented, NotImplemented)
+
+ def testSuccess(self):
+ data = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt")
+
+ fn = testutils.CallCounter(compat.partial(self._Success, data))
+
+ result = hv_xen._GetXmList(fn, True, _timeout=0.1)
+
+ self.assertEqual(len(result), 4)
+
+ self.assertEqual(map(compat.fst, result), [
+ "Domain-0",
+ "server01.example.com",
+ "web3106215069.example.com",
+ "testinstance.example.com",
+ ])
+
+ self.assertEqual(fn.Count(), 1)
+
+
+class TestParseNodeInfo(testutils.GanetiTestCase):
+ def testEmpty(self):
+ self.assertEqual(hv_xen._ParseNodeInfo(""), {})
+
+ def testUnknownInput(self):
+ data = "\n".join([
+ "foo bar",
+ "something else goes",
+ "here",
+ ])
+ self.assertEqual(hv_xen._ParseNodeInfo(data), {})
+
+ def testBasicInfo(self):
+ data = testutils.ReadTestData("xen-xm-info-4.0.1.txt")
+ result = hv_xen._ParseNodeInfo(data)
+ self.assertEqual(result, {
+ "cpu_nodes": 1,
+ "cpu_sockets": 2,
+ "cpu_total": 4,
+ "hv_version": (4, 0),
+ "memory_free": 8004,
+ "memory_total": 16378,
+ })
+
+
+class TestMergeInstanceInfo(testutils.GanetiTestCase):
+ def testEmpty(self):
+ self.assertEqual(hv_xen._MergeInstanceInfo({}, lambda _: []), {})
+
+ def _FakeXmList(self, include_node):
+ self.assertTrue(include_node)
+ return [
+ (hv_xen._DOM0_NAME, NotImplemented, 4096, 7, NotImplemented,
+ NotImplemented),
+ ("inst1.example.com", NotImplemented, 2048, 4, NotImplemented,
+ NotImplemented),
+ ]
+
+ def testMissingNodeInfo(self):
+ result = hv_xen._MergeInstanceInfo({}, self._FakeXmList)
+ self.assertEqual(result, {
+ "memory_dom0": 4096,
+ "dom0_cpus": 7,
+ })
+
+ def testWithNodeInfo(self):
+ info = testutils.ReadTestData("xen-xm-info-4.0.1.txt")
+ result = hv_xen._GetNodeInfo(info, self._FakeXmList)
+ self.assertEqual(result, {
+ "cpu_nodes": 1,
+ "cpu_sockets": 2,
+ "cpu_total": 4,
+ "dom0_cpus": 7,
+ "hv_version": (4, 0),
+ "memory_dom0": 4096,
+ "memory_free": 8004,
+ "memory_hv": 2230,
+ "memory_total": 16378,
+ })
+
+
+class TestGetConfigFileDiskData(unittest.TestCase):
+ def testLetterCount(self):
+ self.assertEqual(len(hv_xen._DISK_LETTERS), 26)
+
+ def testNoDisks(self):
+ self.assertEqual(hv_xen._GetConfigFileDiskData([], "hd"), [])
+
+ def testManyDisks(self):
+ for offset in [0, 1, 10]:
+ disks = [(objects.Disk(dev_type=constants.LD_LV), "/tmp/disk/%s" % idx)
+ for idx in range(len(hv_xen._DISK_LETTERS) + offset)]
+
+ if offset == 0:
+ result = hv_xen._GetConfigFileDiskData(disks, "hd")
+ self.assertEqual(result, [
+ "'phy:/tmp/disk/%s,hd%s,r'" % (idx, string.ascii_lowercase[idx])
+ for idx in range(len(hv_xen._DISK_LETTERS) + offset)
+ ])
+ else:
+ try:
+ hv_xen._GetConfigFileDiskData(disks, "hd")
+ except errors.HypervisorError, err:
+ self.assertEqual(str(err), "Too many disks")
+ else:
+ self.fail("Exception was not raised")
+
+ def testTwoLvDisksWithMode(self):
+ disks = [
+ (objects.Disk(dev_type=constants.LD_LV, mode=constants.DISK_RDWR),
+ "/tmp/diskFirst"),
+ (objects.Disk(dev_type=constants.LD_LV, mode=constants.DISK_RDONLY),
+ "/tmp/diskLast"),
+ ]
+
+ result = hv_xen._GetConfigFileDiskData(disks, "hd")
+ self.assertEqual(result, [
+ "'phy:/tmp/diskFirst,hda,w'",
+ "'phy:/tmp/diskLast,hdb,r'",
+ ])
+
+ def testFileDisks(self):
+ disks = [
+ (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDWR,
+ physical_id=[constants.FD_LOOP]),
+ "/tmp/diskFirst"),
+ (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDONLY,
+ physical_id=[constants.FD_BLKTAP]),
+ "/tmp/diskTwo"),
+ (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDWR,
+ physical_id=[constants.FD_LOOP]),
+ "/tmp/diskThree"),
+ (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDWR,
+ physical_id=[constants.FD_BLKTAP]),
+ "/tmp/diskLast"),
+ ]
+
+ result = hv_xen._GetConfigFileDiskData(disks, "sd")
+ self.assertEqual(result, [
+ "'file:/tmp/diskFirst,sda,w'",
+ "'tap:aio:/tmp/diskTwo,sdb,r'",
+ "'file:/tmp/diskThree,sdc,w'",
+ "'tap:aio:/tmp/diskLast,sdd,w'",
+ ])
+
+ def testInvalidFileDisk(self):
+ disks = [
+ (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDWR,
+ physical_id=["#unknown#"]),
+ "/tmp/diskinvalid"),
+ ]
+
+ self.assertRaises(KeyError, hv_xen._GetConfigFileDiskData, disks, "sd")
+
+
+class TestXenHypervisorUnknownCommand(unittest.TestCase):
+ def test(self):
+ cmd = "#unknown command#"
+ self.assertFalse(cmd in constants.KNOWN_XEN_COMMANDS)
+ hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented,
+ _run_cmd_fn=NotImplemented,
+ _cmd=cmd)
+ self.assertRaises(errors.ProgrammerError, hv._RunXen, [])
+
+
+class TestXenHypervisorWriteConfigFile(unittest.TestCase):
+ def setUp(self):
+ self.tmpdir = tempfile.mkdtemp()
+
+ def tearDown(self):
+ shutil.rmtree(self.tmpdir)
+
+ def testWriteError(self):
+ cfgdir = utils.PathJoin(self.tmpdir, "foobar")
+
+ hv = hv_xen.XenHypervisor(_cfgdir=cfgdir,
+ _run_cmd_fn=NotImplemented,
+ _cmd=NotImplemented)
+
+ self.assertFalse(os.path.exists(cfgdir))
+
+ try:
+ hv._WriteConfigFile("name", "data")
+ except errors.HypervisorError, err:
+ self.assertTrue(str(err).startswith("Cannot write Xen instance"))
+ else:
+ self.fail("Exception was not raised")
+
+
+class _TestXenHypervisor(object):
+ TARGET = NotImplemented
+ CMD = NotImplemented
+ HVNAME = NotImplemented
+
+ def setUp(self):
+ super(_TestXenHypervisor, self).setUp()
+
+ self.tmpdir = tempfile.mkdtemp()
+
+ self.vncpw = "".join(random.sample(string.ascii_letters, 10))
+
+ self.vncpw_path = utils.PathJoin(self.tmpdir, "vncpw")
+ utils.WriteFile(self.vncpw_path, data=self.vncpw)
+
+ def tearDown(self):
+ super(_TestXenHypervisor, self).tearDown()
+
+ shutil.rmtree(self.tmpdir)
+
+ def _GetHv(self, run_cmd=NotImplemented):
+ return self.TARGET(_cfgdir=self.tmpdir, _run_cmd_fn=run_cmd, _cmd=self.CMD)
+
+ def _SuccessCommand(self, stdout, cmd):
+ self.assertEqual(cmd[0], self.CMD)
+
+ return utils.RunResult(constants.EXIT_SUCCESS, None, stdout, "", None,
+ NotImplemented, NotImplemented)
+
+ def _FailingCommand(self, cmd):
+ self.assertEqual(cmd[0], self.CMD)
+
+ return utils.RunResult(constants.EXIT_FAILURE, None,
+ "", "This command failed", None,
+ NotImplemented, NotImplemented)
+
+ def _FakeTcpPing(self, expected, result, target, port, **kwargs):
+ self.assertEqual((target, port), expected)
+ return result
+
+ def testReadingNonExistentConfigFile(self):
+ hv = self._GetHv()
+
+ try:
+ hv._ReadConfigFile("inst15780.example.com")
+ except errors.HypervisorError, err:
+ self.assertTrue(str(err).startswith("Failed to load Xen config file:"))
+ else:
+ self.fail("Exception was not raised")
+
+ def testRemovingAutoConfigFile(self):
+ name = "inst8206.example.com"
+ cfgfile = utils.PathJoin(self.tmpdir, name)
+ autodir = utils.PathJoin(self.tmpdir, "auto")
+ autocfgfile = utils.PathJoin(autodir, name)
+
+ os.mkdir(autodir)
+
+ utils.WriteFile(autocfgfile, data="")
+
+ hv = self._GetHv()
+
+ self.assertTrue(os.path.isfile(autocfgfile))
+ hv._WriteConfigFile(name, "content")
+ self.assertFalse(os.path.exists(autocfgfile))
+ self.assertEqual(utils.ReadFile(cfgfile), "content")
+
+ def _XenList(self, cmd):
+ self.assertEqual(cmd, [self.CMD, "list"])
+
+ # TODO: Use actual data from "xl" command
+ output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt")
+
+ return self._SuccessCommand(output, cmd)
+
+ def testGetInstanceInfo(self):
+ hv = self._GetHv(run_cmd=self._XenList)
+
+ (name, instid, memory, vcpus, state, runtime) = \
+ hv.GetInstanceInfo("server01.example.com")
+
+ self.assertEqual(name, "server01.example.com")
+ self.assertEqual(instid, 1)
+ self.assertEqual(memory, 1024)
+ self.assertEqual(vcpus, 1)
+ self.assertEqual(state, "-b----")
+ self.assertAlmostEqual(runtime, 167643.2)
+
+ def testGetInstanceInfoDom0(self):
+ hv = self._GetHv(run_cmd=self._XenList)
+
+ # TODO: Not sure if this is actually used anywhere (can't find it), but the
+ # code supports querying for Dom0
+ (name, instid, memory, vcpus, state, runtime) = \
+ hv.GetInstanceInfo(hv_xen._DOM0_NAME)
+
+ self.assertEqual(name, "Domain-0")
+ self.assertEqual(instid, 0)
+ self.assertEqual(memory, 1023)
+ self.assertEqual(vcpus, 1)
+ self.assertEqual(state, "r-----")
+ self.assertAlmostEqual(runtime, 154706.1)
+
+ def testGetInstanceInfoUnknown(self):
+ hv = self._GetHv(run_cmd=self._XenList)
+
+ result = hv.GetInstanceInfo("unknown.example.com")
+ self.assertTrue(result is None)
+
+ def testGetAllInstancesInfo(self):
+ hv = self._GetHv(run_cmd=self._XenList)
+
+ result = hv.GetAllInstancesInfo()
+
+ self.assertEqual(map(compat.fst, result), [
+ "server01.example.com",
+ "web3106215069.example.com",
+ "testinstance.example.com",
+ ])
+
+ def testListInstances(self):
+ hv = self._GetHv(run_cmd=self._XenList)
+
+ self.assertEqual(hv.ListInstances(), [
+ "server01.example.com",
+ "web3106215069.example.com",
+ "testinstance.example.com",
+ ])
+
+ def testVerify(self):
+ output = testutils.ReadTestData("xen-xm-info-4.0.1.txt")
+ hv = self._GetHv(run_cmd=compat.partial(self._SuccessCommand,
+ output))
+ self.assertTrue(hv.Verify() is None)
+
+ def testVerifyFailing(self):
+ hv = self._GetHv(run_cmd=self._FailingCommand)
+ self.assertTrue("failed:" in hv.Verify())
+
+ def _StartInstanceCommand(self, inst, paused, failcreate, cmd):
+ if cmd == [self.CMD, "info"]:
+ output = testutils.ReadTestData("xen-xm-info-4.0.1.txt")
+ elif cmd == [self.CMD, "list"]:
+ output = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt")
+ elif cmd[:2] == [self.CMD, "create"]:
+ args = cmd[2:]
+ cfgfile = utils.PathJoin(self.tmpdir, inst.name)
+
+ if paused:
+ self.assertEqual(args, ["-p", cfgfile])
+ else:
+ self.assertEqual(args, [cfgfile])
+
+ if failcreate:
+ return self._FailingCommand(cmd)
+
+ output = ""
+ else:
+ self.fail("Unhandled command: %s" % (cmd, ))
+
+ return self._SuccessCommand(output, cmd)
+ #return self._FailingCommand(cmd)
+
+ def _MakeInstance(self):
+ # Copy default parameters
+ bep = objects.FillDict(constants.BEC_DEFAULTS, {})
+ hvp = objects.FillDict(constants.HVC_DEFAULTS[self.HVNAME], {})
+
+ # Override default VNC password file path
+ if constants.HV_VNC_PASSWORD_FILE in hvp:
+ hvp[constants.HV_VNC_PASSWORD_FILE] = self.vncpw_path
+
+ disks = [
+ (objects.Disk(dev_type=constants.LD_LV, mode=constants.DISK_RDWR),
+ utils.PathJoin(self.tmpdir, "disk0")),
+ (objects.Disk(dev_type=constants.LD_LV, mode=constants.DISK_RDONLY),
+ utils.PathJoin(self.tmpdir, "disk1")),
+ ]
+
+ inst = objects.Instance(name="server01.example.com",
+ hvparams=hvp, beparams=bep,
+ osparams={}, nics=[], os="deb1",
+ disks=map(compat.fst, disks))
+ inst.UpgradeConfig()
+
+ return (inst, disks)
+
+ def testStartInstance(self):
+ (inst, disks) = self._MakeInstance()
+
+ for failcreate in [False, True]:
+ for paused in [False, True]:
+ run_cmd = compat.partial(self._StartInstanceCommand,
+ inst, paused, failcreate)
+
+ hv = self._GetHv(run_cmd=run_cmd)
+
+ # Ensure instance is not listed
+ self.assertTrue(inst.name not in hv.ListInstances())
+
+ # Remove configuration
+ cfgfile = utils.PathJoin(self.tmpdir, inst.name)
+ utils.RemoveFile(cfgfile)
+
+ if failcreate:
+ self.assertRaises(errors.HypervisorError, hv.StartInstance,
+ inst, disks, paused)
+ else:
+ hv.StartInstance(inst, disks, paused)
+
+ # Check if configuration was updated
+ lines = utils.ReadFile(cfgfile).splitlines()
+
+ if constants.HV_VNC_PASSWORD_FILE in inst.hvparams:
+ self.assertTrue(("vncpasswd = '%s'" % self.vncpw) in lines)
+ else:
+ extra = inst.hvparams[constants.HV_KERNEL_ARGS]
+ self.assertTrue(("extra = '%s'" % extra) in lines)
+
+ def _StopInstanceCommand(self, instance_name, force, fail, cmd):
+ if ((force and cmd[:2] == [self.CMD, "destroy"]) or
+ (not force and cmd[:2] == [self.CMD, "shutdown"])):
+ self.assertEqual(cmd[2:], [instance_name])
+ output = ""
+ else:
+ self.fail("Unhandled command: %s" % (cmd, ))
+
+ if fail:
+ # Simulate a failing command
+ return self._FailingCommand(cmd)
+ else:
+ return self._SuccessCommand(output, cmd)
+
+ def testStopInstance(self):
+ name = "inst4284.example.com"
+ cfgfile = utils.PathJoin(self.tmpdir, name)
+ cfgdata = "config file content\n"
+
+ for force in [False, True]:
+ for fail in [False, True]:
+ utils.WriteFile(cfgfile, data=cfgdata)
+
+ run_cmd = compat.partial(self._StopInstanceCommand, name, force, fail)
+
+ hv = self._GetHv(run_cmd=run_cmd)
+
+ self.assertTrue(os.path.isfile(cfgfile))
+
+ if fail:
+ try:
+ hv._StopInstance(name, force)
+ except errors.HypervisorError, err:
+ self.assertTrue(str(err).startswith("Failed to stop instance"))
+ else:
+ self.fail("Exception was not raised")
+ self.assertEqual(utils.ReadFile(cfgfile), cfgdata,
+ msg=("Configuration was removed when stopping"
+ " instance failed"))
+ else:
+ hv._StopInstance(name, force)
+ self.assertFalse(os.path.exists(cfgfile))
+
+ def _MigrateNonRunningInstCmd(self, cmd):
+ if cmd == [self.CMD, "list"]:
+ output = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt")
+ else:
+ self.fail("Unhandled command: %s" % (cmd, ))
+
+ return self._SuccessCommand(output, cmd)
+
+ def testMigrateInstanceNotRunning(self):
+ name = "nonexistinginstance.example.com"
+ target = constants.IP4_ADDRESS_LOCALHOST
+ port = 14618
+
+ hv = self._GetHv(run_cmd=self._MigrateNonRunningInstCmd)
+
+ for live in [False, True]:
+ try:
+ hv._MigrateInstance(NotImplemented, name, target, port, live,
+ _ping_fn=NotImplemented)
+ except errors.HypervisorError, err:
+ self.assertEqual(str(err), "Instance not running, cannot migrate")
+ else:
+ self.fail("Exception was not raised")
+
+ def _MigrateInstTargetUnreachCmd(self, cmd):
+ if cmd == [self.CMD, "list"]:
+ output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt")
+ else:
+ self.fail("Unhandled command: %s" % (cmd, ))
+
+ return self._SuccessCommand(output, cmd)
+
+ def testMigrateTargetUnreachable(self):
+ name = "server01.example.com"
+ target = constants.IP4_ADDRESS_LOCALHOST
+ port = 28349
+
+ hv = self._GetHv(run_cmd=self._MigrateInstTargetUnreachCmd)
+
+ for live in [False, True]:
+ if self.CMD == constants.XEN_CMD_XL:
+ # TODO: Detect unreachable targets
+ pass
+ else:
+ try:
+ hv._MigrateInstance(NotImplemented, name, target, port, live,
+ _ping_fn=compat.partial(self._FakeTcpPing,
+ (target, port), False))
+ except errors.HypervisorError, err:
+ wanted = "Remote host %s not" % target
+ self.assertTrue(str(err).startswith(wanted))
+ else:
+ self.fail("Exception was not raised")
+
+ def _MigrateInstanceCmd(self, cluster_name, instance_name, target, port,
+ live, fail, cmd):
+ if cmd == [self.CMD, "list"]:
+ output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt")
+ elif cmd[:2] == [self.CMD, "migrate"]:
+ if self.CMD == constants.XEN_CMD_XM:
+ args = ["-p", str(port)]
+
+ if live:
+ args.append("-l")
+
+ elif self.CMD == constants.XEN_CMD_XL:
+ args = [
+ "-s", constants.XL_SSH_CMD % cluster_name,
+ "-C", utils.PathJoin(self.tmpdir, instance_name),
+ ]
+
+ else:
+ self.fail("Unknown Xen command '%s'" % self.CMD)
+
+ args.extend([instance_name, target])
+ self.assertEqual(cmd[2:], args)
+
+ if fail:
+ return self._FailingCommand(cmd)
+
+ output = ""
+ else:
+ self.fail("Unhandled command: %s" % (cmd, ))
+
+ return self._SuccessCommand(output, cmd)
+
+ def testMigrateInstance(self):
+ clustername = "cluster.example.com"
+ instname = "server01.example.com"
+ target = constants.IP4_ADDRESS_LOCALHOST
+ port = 22364
+
+ for live in [False, True]:
+ for fail in [False, True]:
+ ping_fn = \
+ testutils.CallCounter(compat.partial(self._FakeTcpPing,
+ (target, port), True))
+
+ run_cmd = \
+ compat.partial(self._MigrateInstanceCmd,
+ clustername, instname, target, port, live,
+ fail)
+
+ hv = self._GetHv(run_cmd=run_cmd)
+
+ if fail:
+ try:
+ hv._MigrateInstance(clustername, instname, target, port, live,
+ _ping_fn=ping_fn)
+ except errors.HypervisorError, err:
+ self.assertTrue(str(err).startswith("Failed to migrate instance"))
+ else:
+ self.fail("Exception was not raised")
+ else:
+ hv._MigrateInstance(clustername, instname, target, port, live,
+ _ping_fn=ping_fn)
+
+ if self.CMD == constants.XEN_CMD_XM:
+ expected_pings = 1
+ else:
+ expected_pings = 0
+
+ self.assertEqual(ping_fn.Count(), expected_pings)
+
+
+def _MakeTestClass(cls, cmd):
+ """Makes a class for testing.
+
+ The returned class has structure as shown in the following pseudo code:
+
+ class Test{cls.__name__}{cmd}(_TestXenHypervisor, unittest.TestCase):
+ TARGET = {cls}
+ CMD = {cmd}
+ HVNAME = {Hypervisor name retrieved using class}
+
+ @type cls: class
+ @param cls: Hypervisor class to be tested
+ @type cmd: string
+ @param cmd: Hypervisor command
+ @rtype: tuple
+ @return: Class name and class object (not instance)
+
+ """
+ name = "Test%sCmd%s" % (cls.__name__, cmd.title())
+ bases = (_TestXenHypervisor, unittest.TestCase)
+ hvname = HVCLASS_TO_HVNAME[cls]
+
+ return (name, type(name, bases, dict(TARGET=cls, CMD=cmd, HVNAME=hvname)))
+
+
+# Create test classes programmatically instead of manually to reduce the risk
+# of forgetting some combinations
+for cls in [hv_xen.XenPvmHypervisor, hv_xen.XenHvmHypervisor]:
+ for cmd in constants.KNOWN_XEN_COMMANDS:
+ (name, testcls) = _MakeTestClass(cls, cmd)
+
+ assert name not in locals()
+
+ locals()[name] = testcls
+
+
if __name__ == "__main__":
testutils.GanetiTestProgram()
self.assertEqual(self.fake_cl.primary_hypervisor, constants.HT_CHROOT)
+class TestClusterObjectTcpUdpPortPool(unittest.TestCase):
+ def testNewCluster(self):
+ self.assertTrue(objects.Cluster().tcpudp_port_pool is None)
+
+ def testSerializingEmpty(self):
+ self.assertEqual(objects.Cluster().ToDict(), {
+ "tcpudp_port_pool": [],
+ })
+
+ def testSerializing(self):
+ cluster = objects.Cluster.FromDict({})
+ self.assertEqual(cluster.tcpudp_port_pool, set())
+
+ cluster.tcpudp_port_pool.add(3546)
+ cluster.tcpudp_port_pool.add(62511)
+
+ data = cluster.ToDict()
+ self.assertEqual(data.keys(), ["tcpudp_port_pool"])
+ self.assertEqual(sorted(data["tcpudp_port_pool"]), sorted([3546, 62511]))
+
+ def testDeserializingEmpty(self):
+ cluster = objects.Cluster.FromDict({})
+ self.assertEqual(cluster.tcpudp_port_pool, set())
+
+ def testDeserialize(self):
+ cluster = objects.Cluster.FromDict({
+ "tcpudp_port_pool": [26214, 10039, 267],
+ })
+ self.assertEqual(cluster.tcpudp_port_pool, set([26214, 10039, 267]))
+
+
class TestOS(unittest.TestCase):
ALL_DATA = [
"debootstrap",
slotted = AutoSlotted()
self.assertEqual(slotted.__slots__, AutoSlotted.SLOTS)
+
+class TestContainerToDicts(unittest.TestCase):
+ def testUnknownType(self):
+ for value in [None, 19410, "xyz"]:
+ try:
+ outils.ContainerToDicts(value)
+ except TypeError, err:
+ self.assertTrue(str(err).startswith("Unknown container type"))
+ else:
+ self.fail("Exception was not raised")
+
+ def testEmptyDict(self):
+ value = {}
+ self.assertFalse(type(value) in outils._SEQUENCE_TYPES)
+ self.assertEqual(outils.ContainerToDicts(value), {})
+
+ def testEmptySequences(self):
+ for cls in [list, tuple, set, frozenset]:
+ self.assertEqual(outils.ContainerToDicts(cls()), [])
+
+
+class _FakeWithFromDict:
+ def FromDict(self, _):
+ raise NotImplemented
+
+
+class TestContainerFromDicts(unittest.TestCase):
+ def testUnknownType(self):
+ for cls in [str, int, bool]:
+ try:
+ outils.ContainerFromDicts(None, cls, NotImplemented)
+ except TypeError, err:
+ self.assertTrue(str(err).startswith("Unknown container type"))
+ else:
+ self.fail("Exception was not raised")
+
+ try:
+ outils.ContainerFromDicts(None, cls(), NotImplemented)
+ except TypeError, err:
+ self.assertTrue(str(err).endswith("is not a type"))
+ else:
+ self.fail("Exception was not raised")
+
+ def testEmptyDict(self):
+ value = {}
+ self.assertFalse(type(value) in outils._SEQUENCE_TYPES)
+ self.assertEqual(outils.ContainerFromDicts(value, dict,
+ NotImplemented),
+ {})
+
+ def testEmptySequences(self):
+ for cls in [list, tuple, set, frozenset]:
+ self.assertEqual(outils.ContainerFromDicts([], cls,
+ _FakeWithFromDict),
+ cls())
+
+
if __name__ == "__main__":
testutils.GanetiTestProgram()
class TestMakeNodeRoot(unittest.TestCase):
def test(self):
- self.assertRaises(RuntimeError, vcluster._MakeNodeRoot, "/tmp", "/")
+ self.assertRaises(RuntimeError, vcluster.MakeNodeRoot, "/tmp", "/")
for i in ["/tmp", "/tmp/", "/tmp///"]:
- self.assertEqual(vcluster._MakeNodeRoot(i, "other.example.com"),
+ self.assertEqual(vcluster.MakeNodeRoot(i, "other.example.com"),
"/tmp/other.example.com")
#!/usr/bin/python
#
-# Copyright (C) 2012 Google Inc.
+# Copyright (C) 2012, 2013 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
"""Script for testing qa.qa_config"""
import unittest
+import tempfile
+import shutil
+import os
+import operator
+
+from ganeti import utils
+from ganeti import serializer
+from ganeti import constants
+from ganeti import compat
from qa import qa_config
+from qa import qa_error
import testutils
}))
+class TestQaConfigLoad(unittest.TestCase):
+ def setUp(self):
+ self.tmpdir = tempfile.mkdtemp()
+
+ def tearDown(self):
+ shutil.rmtree(self.tmpdir)
+
+ def testLoadNonExistent(self):
+ filename = utils.PathJoin(self.tmpdir, "does.not.exist")
+ self.assertRaises(EnvironmentError, qa_config._QaConfig.Load, filename)
+
+ @staticmethod
+ def _WriteConfig(filename, data):
+ utils.WriteFile(filename, data=serializer.DumpJson(data))
+
+ def _CheckLoadError(self, filename, data, expected):
+ self._WriteConfig(filename, data)
+
+ try:
+ qa_config._QaConfig.Load(filename)
+ except qa_error.Error, err:
+ self.assertTrue(str(err).startswith(expected))
+ else:
+ self.fail("Exception was not raised")
+
+ def testFailsValidation(self):
+ filename = utils.PathJoin(self.tmpdir, "qa.json")
+ testconfig = {}
+
+ check_fn = compat.partial(self._CheckLoadError, filename, testconfig)
+
+ # No nodes
+ check_fn("Need at least one node")
+
+ testconfig["nodes"] = [
+ {
+ "primary": "xen-test-0",
+ "secondary": "192.0.2.1",
+ },
+ ]
+
+ # No instances
+ check_fn("Need at least one instance")
+
+ testconfig["instances"] = [
+ {
+ "name": "xen-test-inst1",
+ },
+ ]
+
+ # Missing "disk" and "disk-growth"
+ check_fn("Config options 'disk' and 'disk-growth' ")
+
+ testconfig["disk"] = []
+ testconfig["disk-growth"] = testconfig["disk"]
+
+ # Minimal accepted configuration
+ self._WriteConfig(filename, testconfig)
+ result = qa_config._QaConfig.Load(filename)
+ self.assertTrue(result.get("nodes"))
+
+ # Non-existent instance check script
+ testconfig[qa_config._INSTANCE_CHECK_KEY] = \
+ utils.PathJoin(self.tmpdir, "instcheck")
+ check_fn("Can't find instance check script")
+ del testconfig[qa_config._INSTANCE_CHECK_KEY]
+
+ # No enabled hypervisor
+ testconfig[qa_config._ENABLED_HV_KEY] = None
+ check_fn("No hypervisor is enabled")
+
+ # Unknown hypervisor
+ testconfig[qa_config._ENABLED_HV_KEY] = ["#unknownhv#"]
+ check_fn("Unknown hypervisor(s) enabled:")
+
+
+class TestQaConfigWithSampleConfig(unittest.TestCase):
+ """Tests using C{qa-sample.json}.
+
+ This test case serves two purposes:
+
+ - Ensure shipped C{qa-sample.json} file is considered a valid QA
+ configuration
+ - Test some functions of L{qa_config._QaConfig} without having to
+ mock a whole configuration file
+
+ """
+ def setUp(self):
+ filename = "%s/qa/qa-sample.json" % testutils.GetSourceDir()
+
+ self.config = qa_config._QaConfig.Load(filename)
+
+ def testGetEnabledHypervisors(self):
+ self.assertEqual(self.config.GetEnabledHypervisors(),
+ [constants.DEFAULT_ENABLED_HYPERVISOR])
+
+ def testGetDefaultHypervisor(self):
+ self.assertEqual(self.config.GetDefaultHypervisor(),
+ constants.DEFAULT_ENABLED_HYPERVISOR)
+
+ def testGetInstanceCheckScript(self):
+ self.assertTrue(self.config.GetInstanceCheckScript() is None)
+
+ def testGetAndGetItem(self):
+ self.assertEqual(self.config["nodes"], self.config.get("nodes"))
+
+ def testGetMasterNode(self):
+ self.assertEqual(self.config.GetMasterNode(), self.config["nodes"][0])
+
+
+class TestQaConfig(unittest.TestCase):
+ def setUp(self):
+ filename = \
+ testutils.TestDataFilename("qa-minimal-nodes-instances-only.json")
+
+ self.config = qa_config._QaConfig.Load(filename)
+
+ def testExclusiveStorage(self):
+ self.assertRaises(AssertionError, self.config.GetExclusiveStorage)
+
+ for value in [False, True, 0, 1, 30804, ""]:
+ self.config.SetExclusiveStorage(value)
+ self.assertEqual(self.config.GetExclusiveStorage(), bool(value))
+
+ for template in constants.DISK_TEMPLATES:
+ if value and template not in constants.DTS_EXCL_STORAGE:
+ self.assertFalse(self.config.IsTemplateSupported(template))
+ else:
+ self.assertTrue(self.config.IsTemplateSupported(template))
+
+ def testInstanceConversion(self):
+ self.assertTrue(isinstance(self.config["instances"][0],
+ qa_config._QaInstance))
+
+ def testNodeConversion(self):
+ self.assertTrue(isinstance(self.config["nodes"][0],
+ qa_config._QaNode))
+
+ def testAcquireAndReleaseInstance(self):
+ self.assertFalse(compat.any(map(operator.attrgetter("used"),
+ self.config["instances"])))
+
+ inst = qa_config.AcquireInstance(_cfg=self.config)
+ self.assertTrue(inst.used)
+ self.assertTrue(inst.disk_template is None)
+
+ inst.Release()
+
+ self.assertFalse(inst.used)
+ self.assertTrue(inst.disk_template is None)
+
+ self.assertFalse(compat.any(map(operator.attrgetter("used"),
+ self.config["instances"])))
+
+ def testAcquireInstanceTooMany(self):
+ # Acquire all instances
+ for _ in range(len(self.config["instances"])):
+ inst = qa_config.AcquireInstance(_cfg=self.config)
+ self.assertTrue(inst.used)
+ self.assertTrue(inst.disk_template is None)
+
+ # The next acquisition must fail
+ self.assertRaises(qa_error.OutOfInstancesError,
+ qa_config.AcquireInstance, _cfg=self.config)
+
+ def testAcquireNodeNoneAdded(self):
+ self.assertFalse(compat.any(map(operator.attrgetter("added"),
+ self.config["nodes"])))
+
+ # First call must return master node
+ node = qa_config.AcquireNode(_cfg=self.config)
+ self.assertEqual(node, self.config.GetMasterNode())
+
+ # Next call with exclusion list fails
+ self.assertRaises(qa_error.OutOfNodesError, qa_config.AcquireNode,
+ exclude=[node], _cfg=self.config)
+
+ def testAcquireNodeTooMany(self):
+ # Mark all nodes as marked (master excluded)
+ for node in self.config["nodes"]:
+ if node != self.config.GetMasterNode():
+ node.MarkAdded()
+
+ nodecount = len(self.config["nodes"])
+
+ self.assertTrue(nodecount > 1)
+
+ acquired = []
+
+ for _ in range(nodecount):
+ node = qa_config.AcquireNode(exclude=acquired, _cfg=self.config)
+ if node == self.config.GetMasterNode():
+ self.assertFalse(node.added)
+ else:
+ self.assertTrue(node.added)
+ self.assertEqual(node.use_count, 1)
+ acquired.append(node)
+
+ self.assertRaises(qa_error.OutOfNodesError, qa_config.AcquireNode,
+ exclude=acquired, _cfg=self.config)
+
+
if __name__ == "__main__":
testutils.GanetiTestProgram()