Merge branch 'devel-2.7'
authorIustin Pop <iustin@google.com>
Fri, 15 Feb 2013 11:53:30 +0000 (12:53 +0100)
committerIustin Pop <iustin@google.com>
Fri, 15 Feb 2013 13:04:13 +0000 (14:04 +0100)
* devel-2.7:
  Rename lib/objectutils to outils.py
  Fix typo in gnt-group manpage
  Fix wrong type in a docstring of the RAPI subsystem
  Finish the remote→restricted commands rename
  Enable use of the priority option in hbal
  Add CLI-level option to override the priority
  Add functions to parse CLI-level format of priorities
  Add a function to change an OpCode's priority
  Make hbal opcode annotation more generic
  Add unit tests for RADOSBLockDevice
  Fix rbd showmapped output parsing
  Change default xen root path to /dev/xvda1
  Removes check for conflicts from NetworkDisconnect
  If _UnlockedLookupNetwork() fails raise error
  Force conflicts check in LUNetworkDisconnect

Also updated objects.py with more outils renames.

Signed-off-by: Iustin Pop <iustin@google.com>
Reviewed-by: Guido Trotter <ultrotter@google.com>

57 files changed:
Makefile.am
doc/design-draft.rst
doc/design-hroller.rst [new file with mode: 0644]
lib/client/gnt_network.py
lib/client/gnt_node.py
lib/constants.py
lib/hypervisor/hv_kvm.py
lib/hypervisor/hv_xen.py
lib/network.py
lib/objects.py
lib/opcodes.py
lib/outils.py
lib/rapi/rlib2.py
lib/utils/wrapper.py
lib/vcluster.py
qa/ganeti-qa.py
qa/qa-sample.json
qa/qa_cluster.py
qa/qa_config.py
qa/qa_daemon.py
qa/qa_env.py
qa/qa_group.py
qa/qa_instance.py
qa/qa_network.py [new file with mode: 0644]
qa/qa_node.py
qa/qa_os.py
qa/qa_rapi.py
qa/qa_tags.py
qa/qa_utils.py
src/Ganeti/Common.hs
src/Ganeti/Config.hs
src/Ganeti/HTools/CLI.hs
src/Ganeti/HTools/Program/Harep.hs [new file with mode: 0644]
src/Ganeti/HTools/Program/Main.hs
src/Ganeti/HTools/Types.hs
src/Ganeti/Jobs.hs
src/Ganeti/Objects.hs
src/Ganeti/Query/Common.hs
src/Ganeti/Query/Language.hs
src/Ganeti/Query/Network.hs [new file with mode: 0644]
src/Ganeti/Query/Node.hs
src/Ganeti/Query/Query.hs
src/Ganeti/Types.hs
src/Ganeti/Utils.hs
test/data/qa-minimal-nodes-instances-only.json [new file with mode: 0644]
test/data/xen-xm-info-4.0.1.txt [new file with mode: 0644]
test/data/xen-xm-list-4.0.1-dom0-only.txt [new file with mode: 0644]
test/data/xen-xm-list-4.0.1-four-instances.txt [new file with mode: 0644]
test/hs/Test/Ganeti/Common.hs
test/hs/Test/Ganeti/Objects.hs
test/py/ganeti.constants_unittest.py
test/py/ganeti.hypervisor.hv_kvm_unittest.py
test/py/ganeti.hypervisor.hv_xen_unittest.py
test/py/ganeti.objects_unittest.py
test/py/ganeti.outils_unittest.py
test/py/ganeti.vcluster_unittest.py
test/py/qa.qa_config_unittest.py

index 5831bb9..fc3a522 100644 (file)
@@ -401,6 +401,7 @@ docinput = \
        doc/design-monitoring-agent.rst \
        doc/design-virtual-clusters.rst \
        doc/design-x509-ca.rst \
+       doc/design-hroller.rst \
        doc/devnotes.rst \
        doc/glossary.rst \
        doc/hooks.rst \
@@ -437,7 +438,7 @@ HS_COMPILE_PROGS= \
 # All Haskell non-test programs to be compiled but not automatically installed
 HS_PROGS = $(HS_BIN_PROGS) $(HS_MYEXECLIB_PROGS)
 
-HS_BIN_ROLES = hbal hscan hspace hinfo hcheck hroller
+HS_BIN_ROLES = harep hbal hscan hspace hinfo hcheck hroller
 HS_HTOOLS_PROGS = $(HS_BIN_ROLES) hail
 
 HS_ALL_PROGS = \
@@ -506,6 +507,7 @@ HS_LIB_SRCS = \
        src/Ganeti/HTools/Node.hs \
        src/Ganeti/HTools/PeerMap.hs \
        src/Ganeti/HTools/Program/Hail.hs \
+       src/Ganeti/HTools/Program/Harep.hs \
        src/Ganeti/HTools/Program/Hbal.hs \
        src/Ganeti/HTools/Program/Hcheck.hs \
        src/Ganeti/HTools/Program/Hinfo.hs \
@@ -530,6 +532,7 @@ HS_LIB_SRCS = \
        src/Ganeti/Query/Group.hs \
        src/Ganeti/Query/Job.hs \
        src/Ganeti/Query/Language.hs \
+       src/Ganeti/Query/Network.hs \
        src/Ganeti/Query/Node.hs \
        src/Ganeti/Query/Query.hs \
        src/Ganeti/Query/Server.hs \
@@ -1046,12 +1049,16 @@ TEST_FILES = \
        test/data/proc_drbd83_sync.txt \
        test/data/proc_drbd83_sync_want.txt \
        test/data/proc_drbd83_sync_krnl2.6.39.txt \
+       test/data/qa-minimal-nodes-instances-only.json \
        test/data/sys_drbd_usermode_helper.txt \
        test/data/vgreduce-removemissing-2.02.02.txt \
        test/data/vgreduce-removemissing-2.02.66-fail.txt \
        test/data/vgreduce-removemissing-2.02.66-ok.txt \
        test/data/vgs-missing-pvs-2.02.02.txt \
        test/data/vgs-missing-pvs-2.02.66.txt \
+       test/data/xen-xm-info-4.0.1.txt \
+       test/data/xen-xm-list-4.0.1-dom0-only.txt \
+       test/data/xen-xm-list-4.0.1-four-instances.txt \
        test/py/ganeti-cli.test \
        test/py/gnt-cli.test \
        test/py/import-export_unittest-helper
index 581a2d4..c18469b 100644 (file)
@@ -15,6 +15,7 @@ Design document drafts
    design-autorepair.rst
    design-partitioned.rst
    design-monitoring-agent.rst
+   design-hroller.rst
 
 .. vim: set textwidth=72 :
 .. Local Variables:
diff --git a/doc/design-hroller.rst b/doc/design-hroller.rst
new file mode 100644 (file)
index 0000000..632531b
--- /dev/null
@@ -0,0 +1,154 @@
+============
+HRoller tool
+============
+
+.. contents:: :depth: 4
+
+This is a design document detailing the cluster maintenance scheduler,
+HRoller.
+
+
+Current state and shortcomings
+==============================
+
+To enable automating cluster-wide reboots a new htool, called HRoller,
+was added to Ganeti starting from version 2.7. This tool helps
+parallelizing cluster offline maintenances by calculating which nodes
+are not both primary and secondary for a DRBD instance, and thus can be
+rebooted at the same time, when all instances are down.
+
+The way this is done is documented in the :manpage:`hroller(1)` manpage.
+
+We would now like to perform online maintenance on the cluster by
+rebooting nodes after evacuating their primary instances (rolling
+reboots).
+
+Proposed changes
+================
+
+
+Calculating rolling maintenances
+--------------------------------
+
+In order to perform rolling maintenance we need to migrate instances off
+the nodes before a reboot. How this can be done depends on the
+instance's disk template and status:
+
+Down instances
+++++++++++++++
+
+If an instance was shutdown when the maintenance started it will be
+ignored. This allows avoiding needlessly moving its primary around,
+since it won't suffer a downtime anyway.
+
+
+DRBD
+++++
+
+Each node must migrate all instances off to their secondaries, and then
+can either be rebooted, or the secondaries can be evacuated as well.
+
+Since currently doing a ``replace-disks`` on DRBD breaks redundancy,
+it's not any safer than temporarily rebooting a node with secondaries on
+them (citation needed). As such we'll implement for now just the
+"migrate+reboot" mode, and focus later on replace-disks as well.
+
+In order to do that we can use the following algorithm:
+
+1) Compute node sets that don't contain both the primary and the
+secondary for any instance. This can be done already by the current
+hroller graph coloring algorithm: nodes are in the same set (color) if
+and only if no edge (instance) exists between them (see the
+:manpage:`hroller(1)` manpage for more details).
+2) Inside each node set calculate subsets that don't have any secondary
+node in common (this can be done by creating a graph of nodes that are
+connected if and only if an instance on both has the same secondary
+node, and coloring that graph)
+3) It is then possible to migrate in parallel all nodes in a subset
+created at step 2, and then reboot/perform maintenance on them, and
+migrate back their original primaries, which allows the computation
+above to be reused for each following subset without N+1 failures being
+triggered, if none were present before. See below about the actual
+execution of the maintenance.
+
+Non-DRBD
+++++++++
+
+All non-DRBD disk templates that can be migrated have no "secondary"
+concept. As such instances can be migrated to any node (in the same
+nodegroup). In order to do the job we can either:
+
+- Perform migrations on one node at a time, perform the maintenance on
+  that node, and proceed (the node will then be targeted again to host
+  instances automatically, as hail chooses targets for the instances
+  between all nodes in a group. Nodes in different nodegroups can be
+  handled in parallel.
+- Perform migrations on one node at a time, but without waiting for the
+  first node to come back before proceeding. This allows us to continue,
+  restricting the cluster, until no more capacity in the nodegroup is
+  available, and then having to wait for some nodes to come back so that
+  capacity is available again for the last few nodes.
+- Pre-Calculate sets of nodes that can be migrated together (probably
+  with a greedy algorithm) and parallelize between them, with the
+  migrate-back approach discussed for DRBD to perform the calculation
+  only once.
+
+Note that for non-DRBD disks that still use local storage (eg. RBD and
+plain) redundancy might break anyway, and nothing except the first
+algorithm might be safe. This perhaps would be a good reason to consider
+managing better RBD pools, if those are implemented on top of nodes
+storage, rather than on dedicated storage machines.
+
+Executing rolling maintenances
+------------------------------
+
+Hroller accepts commands to run to do maintenance automatically. These
+are going to be run on the machine hroller runs on, and take a node name
+as input. They have then to gain access to the target node (via ssh,
+restricted commands, or some other means) and perform their duty.
+
+1) A command (--check-cmd) will be called on all selected online nodes
+to check whether a node needs maintenance. Hroller will proceed only on
+nodes that respond positively to this invocation.
+FIXME: decide about -D
+2) Hroller will evacuate the node of all primary instances.
+3) A command (--maint-cmd) will be called on a node to do the actual
+maintenance operation.  It should do any operation needed to perform the
+maintenance including triggering the actual reboot.
+3) A command (--verify-cmd) will be called to check that the operation
+was successful, it has to wait until the target node is back up (and
+decide after how long it should give up) and perform the verification.
+If it's not successful hroller will stop and not proceed with other
+nodes.
+4) The master node will be kept last, but will not otherwise be treated
+specially. If hroller was running on the master node, care must be
+exercised as its maintenance will have interrupted the software itself,
+and as such the verification step will not happen. This will not
+automatically be taken care of, in the first version. An additional flag
+to just skip the master node will be present as well, in case that's
+preferred.
+
+
+Future work
+===========
+
+DRBD nodes' ``replace-disks``' functionality should be implemented. Note
+that when we will support a DRBD version that allows multi-secondary
+this can be done safely, without losing replication at any time, by
+adding a temporary secondary and only when the sync is finished dropping
+the previous one.
+
+If/when RBD pools can be managed inside Ganeti, care can be taken so
+that the pool is evacuated as well from a node before it's put into
+maintenance. This is equivalent to evacuating DRBD secondaries.
+
+Master failovers during the maintenance should be performed by hroller.
+This requires RPC/RAPI support for master failover. Hroller should also
+be modified to better support running on the master itself and
+continuing on the new master.
+
+.. vim: set textwidth=72 :
+.. Local Variables:
+.. mode: rst
+.. fill-column: 72
+.. End:
index b2d6360..17c1bac 100644 (file)
@@ -167,9 +167,11 @@ def ListNetworks(opts, args):
     "tags": (",".join, False),
     }
 
+  cl = GetClient()
   return GenericList(constants.QR_NETWORK, desired_fields, args, None,
                      opts.separator, not opts.no_headers,
-                     verbose=opts.verbose, format_override=fmtoverride)
+                     verbose=opts.verbose, format_override=fmtoverride,
+                     cl=cl)
 
 
 def ListNetworkFields(opts, args):
index e15a6d8..e893b5e 100644 (file)
@@ -1,7 +1,7 @@
 #
 #
 
-# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -303,7 +303,7 @@ def ListNodes(opts, args):
   fmtoverride = dict.fromkeys(["pinst_list", "sinst_list", "tags"],
                               (",".join, False))
 
-  cl = GetClient(query=False)
+  cl = GetClient(query=True)
 
   return GenericList(constants.QR_NODE, selected_fields, args, opts.units,
                      opts.separator, not opts.no_headers,
@@ -529,8 +529,6 @@ def ShowNodeConfig(opts, args):
   @return: the desired exit code
 
   """
-  # note: if this starts using RPC fields, and we haven't yet fixed
-  # hconfd, then we should revert to query=False
   cl = GetClient(query=True)
   result = cl.QueryNodes(fields=["name", "pip", "sip",
                                  "pinst_list", "sinst_list",
index 884eb6b..0e4f471 100644 (file)
@@ -201,6 +201,12 @@ XEN_CMD_XM = "xm"
 XEN_CMD_XL = "xl"
 # FIXME: This will be made configurable using hvparams in Ganeti 2.7
 XEN_CMD = _autoconf.XEN_CMD
+
+KNOWN_XEN_COMMANDS = compat.UniqueFrozenset([
+  XEN_CMD_XM,
+  XEN_CMD_XL,
+  ])
+
 # When the Xen toolstack used is "xl", live migration requires the source host
 # to connect to the target host via ssh (xl runs this command). We need to pass
 # the command xl runs some extra info so that it can use Ganeti's key
index d0f5a00..25e43b7 100644 (file)
@@ -80,7 +80,24 @@ _SPICE_ADDITIONAL_PARAMS = frozenset([
   ])
 
 
-def _ProbeTapVnetHdr(fd):
+def _GetTunFeatures(fd, _ioctl=fcntl.ioctl):
+  """Retrieves supported TUN features from file descriptor.
+
+  @see: L{_ProbeTapVnetHdr}
+
+  """
+  req = struct.pack("I", 0)
+  try:
+    buf = _ioctl(fd, TUNGETFEATURES, req)
+  except EnvironmentError, err:
+    logging.warning("ioctl(TUNGETFEATURES) failed: %s", err)
+    return None
+  else:
+    (flags, ) = struct.unpack("I", buf)
+    return flags
+
+
+def _ProbeTapVnetHdr(fd, _features_fn=_GetTunFeatures):
   """Check whether to enable the IFF_VNET_HDR flag.
 
   To do this, _all_ of the following conditions must be met:
@@ -97,20 +114,19 @@ def _ProbeTapVnetHdr(fd):
    @param fd: the file descriptor of /dev/net/tun
 
   """
-  req = struct.pack("I", 0)
-  try:
-    res = fcntl.ioctl(fd, TUNGETFEATURES, req)
-  except EnvironmentError:
-    logging.warning("TUNGETFEATURES ioctl() not implemented")
-    return False
+  flags = _features_fn(fd)
 
-  tunflags = struct.unpack("I", res)[0]
-  if tunflags & IFF_VNET_HDR:
-    return True
-  else:
-    logging.warning("Host does not support IFF_VNET_HDR, not enabling")
+  if flags is None:
+    # Not supported
     return False
 
+  result = bool(flags & IFF_VNET_HDR)
+
+  if not result:
+    logging.warning("Kernel does not support IFF_VNET_HDR, not enabling")
+
+  return result
+
 
 def _OpenTap(vnet_hdr=True):
   """Open a new tap device and return its file descriptor.
@@ -284,7 +300,7 @@ class QmpConnection:
     greeting = self._Recv()
     if not greeting[self._FIRST_MESSAGE_KEY]:
       self._connected = False
-      raise errors.HypervisorError("kvm: qmp communication error (wrong"
+      raise errors.HypervisorError("kvm: QMP communication error (wrong"
                                    " server greeting")
 
     # Let's put the monitor in command mode using the qmp_capabilities
@@ -447,7 +463,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     constants.HV_VNC_BIND_ADDRESS:
       (False, lambda x: (netutils.IP4Address.IsValid(x) or
                          utils.IsNormAbsPath(x)),
-       "the VNC bind address must be either a valid IP address or an absolute"
+       "The VNC bind address must be either a valid IP address or an absolute"
        " pathname", None, None),
     constants.HV_VNC_TLS: hv_base.NO_CHECK,
     constants.HV_VNC_X509: hv_base.OPT_DIR_CHECK,
@@ -457,7 +473,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     constants.HV_KVM_SPICE_IP_VERSION:
       (False, lambda x: (x == constants.IFACE_NO_IP_VERSION_SPECIFIED or
                          x in constants.VALID_IP_VERSIONS),
-       "the SPICE IP version should be 4 or 6",
+       "The SPICE IP version should be 4 or 6",
        None, None),
     constants.HV_KVM_SPICE_PASSWORD_FILE: hv_base.OPT_FILE_CHECK,
     constants.HV_KVM_SPICE_LOSSLESS_IMG_COMPR:
@@ -838,8 +854,8 @@ class KVMHypervisor(hv_base.BaseHypervisor):
 
     result = utils.RunCmd([pathutils.KVM_IFUP, tap], env=env)
     if result.failed:
-      raise errors.HypervisorError("Failed to configure interface %s: %s."
-                                   " Network configuration script output: %s" %
+      raise errors.HypervisorError("Failed to configure interface %s: %s;"
+                                   " network configuration script output: %s" %
                                    (tap, result.fail_reason, result.output))
 
   @staticmethod
@@ -1206,9 +1222,9 @@ class KVMHypervisor(hv_base.BaseHypervisor):
           else:
             vnc_arg = "%s:%d" % (vnc_bind_address, display)
         else:
-          logging.error("Network port is not a valid VNC display (%d < %d)."
-                        " Not starting VNC", instance.network_port,
-                        constants.VNC_BASE_PORT)
+          logging.error("Network port is not a valid VNC display (%d < %d),"
+                        " not starting VNC",
+                        instance.network_port, constants.VNC_BASE_PORT)
           vnc_arg = "none"
 
         # Only allow tls and other option when not binding to a file, for now.
@@ -1244,7 +1260,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
         # have that kind of IP addresses, throw an exception
         if spice_ip_version != constants.IFACE_NO_IP_VERSION_SPECIFIED:
           if not addresses[spice_ip_version]:
-            raise errors.HypervisorError("spice: unable to get an IPv%s address"
+            raise errors.HypervisorError("SPICE: Unable to get an IPv%s address"
                                          " for %s" % (spice_ip_version,
                                                       spice_bind))
 
@@ -1261,7 +1277,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
         elif addresses[constants.IP6_VERSION]:
           spice_ip_version = constants.IP6_VERSION
         else:
-          raise errors.HypervisorError("spice: unable to get an IP address"
+          raise errors.HypervisorError("SPICE: Unable to get an IP address"
                                        " for %s" % (spice_bind))
 
         spice_address = addresses[spice_ip_version][0]
@@ -1316,8 +1332,10 @@ class KVMHypervisor(hv_base.BaseHypervisor):
         # Enable the spice agent communication channel between the host and the
         # agent.
         kvm_cmd.extend(["-device", "virtio-serial-pci"])
-        kvm_cmd.extend(["-device", "virtserialport,chardev=spicechannel0,"
-                                                   "name=com.redhat.spice.0"])
+        kvm_cmd.extend([
+          "-device",
+          "virtserialport,chardev=spicechannel0,name=com.redhat.spice.0",
+          ])
         kvm_cmd.extend(["-chardev", "spicevmc,id=spicechannel0,name=vdagent"])
 
       logging.info("KVM: SPICE will listen on port %s", instance.network_port)
@@ -1649,16 +1667,22 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """Invoke a command on the instance monitor.
 
     """
+    # TODO: Replace monitor calls with QMP once KVM >= 0.14 is the minimum
+    # version. The monitor protocol is designed for human consumption, whereas
+    # QMP is made for programmatic usage. In the worst case QMP can also
+    # execute monitor commands. As it is, all calls to socat take at least
+    # 500ms and likely more: socat can't detect the end of the reply and waits
+    # for 500ms of no data received before exiting (500 ms is the default for
+    # the "-t" parameter).
     socat = ("echo %s | %s STDIO UNIX-CONNECT:%s" %
              (utils.ShellQuote(command),
               constants.SOCAT_PATH,
               utils.ShellQuote(self._InstanceMonitor(instance_name))))
     result = utils.RunCmd(socat)
     if result.failed:
-      msg = ("Failed to send command '%s' to instance %s."
-             " output: %s, error: %s, fail_reason: %s" %
-             (command, instance_name,
-              result.stdout, result.stderr, result.fail_reason))
+      msg = ("Failed to send command '%s' to instance '%s', reason '%s',"
+             " output: %s" %
+             (command, instance_name, result.fail_reason, result.output))
       raise errors.HypervisorError(msg)
 
     return result
@@ -2075,13 +2099,13 @@ class KVMHypervisor(hv_base.BaseHypervisor):
         # IP of that family
         if (netutils.IP4Address.IsValid(spice_bind) and
             spice_ip_version != constants.IP4_VERSION):
-          raise errors.HypervisorError("spice: got an IPv4 address (%s), but"
+          raise errors.HypervisorError("SPICE: Got an IPv4 address (%s), but"
                                        " the specified IP version is %s" %
                                        (spice_bind, spice_ip_version))
 
         if (netutils.IP6Address.IsValid(spice_bind) and
             spice_ip_version != constants.IP6_VERSION):
-          raise errors.HypervisorError("spice: got an IPv6 address (%s), but"
+          raise errors.HypervisorError("SPICE: Got an IPv6 address (%s), but"
                                        " the specified IP version is %s" %
                                        (spice_bind, spice_ip_version))
     else:
@@ -2089,7 +2113,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
       # error if any of them is set without it.
       for param in _SPICE_ADDITIONAL_PARAMS:
         if hvparams[param]:
-          raise errors.HypervisorError("spice: %s requires %s to be set" %
+          raise errors.HypervisorError("SPICE: %s requires %s to be set" %
                                        (param, constants.HV_KVM_SPICE_BIND))
 
   @classmethod
@@ -2118,21 +2142,21 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     if spice_bind:
       # only one of VNC and SPICE can be used currently.
       if hvparams[constants.HV_VNC_BIND_ADDRESS]:
-        raise errors.HypervisorError("both SPICE and VNC are configured, but"
+        raise errors.HypervisorError("Both SPICE and VNC are configured, but"
                                      " only one of them can be used at a"
-                                     " given time.")
+                                     " given time")
 
       # check that KVM supports SPICE
       kvmhelp = cls._GetKVMOutput(kvm_path, cls._KVMOPT_HELP)
       if not cls._SPICE_RE.search(kvmhelp):
-        raise errors.HypervisorError("spice is configured, but it is not"
-                                     " supported according to kvm --help")
+        raise errors.HypervisorError("SPICE is configured, but it is not"
+                                     " supported according to 'kvm --help'")
 
       # if spice_bind is not an IP address, it must be a valid interface
-      bound_to_addr = (netutils.IP4Address.IsValid(spice_bind)
-                       or netutils.IP6Address.IsValid(spice_bind))
+      bound_to_addr = (netutils.IP4Address.IsValid(spice_bind) or
+                       netutils.IP6Address.IsValid(spice_bind))
       if not bound_to_addr and not netutils.IsValidInterface(spice_bind):
-        raise errors.HypervisorError("spice: the %s parameter must be either"
+        raise errors.HypervisorError("SPICE: The %s parameter must be either"
                                      " a valid IP address or interface name" %
                                      constants.HV_KVM_SPICE_BIND)
 
index 626994f..8789571 100644 (file)
@@ -24,6 +24,7 @@
 """
 
 import logging
+import string # pylint: disable=W0402
 from cStringIO import StringIO
 
 from ganeti import constants
@@ -41,6 +42,12 @@ XL_CONFIG_FILE = utils.PathJoin(pathutils.XEN_CONFIG_DIR, "xen/xl.conf")
 VIF_BRIDGE_SCRIPT = utils.PathJoin(pathutils.XEN_CONFIG_DIR,
                                    "scripts/vif-bridge")
 _DOM0_NAME = "Domain-0"
+_DISK_LETTERS = string.ascii_lowercase
+
+_FILE_DRIVER_MAP = {
+  constants.FD_LOOP: "file",
+  constants.FD_BLKTAP: "tap:aio",
+  }
 
 
 def _CreateConfigCpus(cpu_mask):
@@ -75,6 +82,224 @@ def _CreateConfigCpus(cpu_mask):
     return "cpus = [ %s ]" % ", ".join(map(_GetCPUMap, cpu_list))
 
 
+def _RunXmList(fn, xmllist_errors):
+  """Helper function for L{_GetXmList} to run "xm list".
+
+  @type fn: callable
+  @param fn: Function returning result of running C{xm list}
+  @type xmllist_errors: list
+  @param xmllist_errors: Error list
+  @rtype: list
+
+  """
+  result = fn()
+  if result.failed:
+    logging.error("xm list failed (%s): %s", result.fail_reason,
+                  result.output)
+    xmllist_errors.append(result)
+    raise utils.RetryAgain()
+
+  # skip over the heading
+  return result.stdout.splitlines()
+
+
+def _ParseXmList(lines, include_node):
+  """Parses the output of C{xm list}.
+
+  @type lines: list
+  @param lines: Output lines of C{xm list}
+  @type include_node: boolean
+  @param include_node: If True, return information for Dom0
+  @return: list of tuple containing (name, id, memory, vcpus, state, time
+    spent)
+
+  """
+  result = []
+
+  # Iterate through all lines while ignoring header
+  for line in lines[1:]:
+    # The format of lines is:
+    # Name      ID Mem(MiB) VCPUs State  Time(s)
+    # Domain-0   0  3418     4 r-----    266.2
+    data = line.split()
+    if len(data) != 6:
+      raise errors.HypervisorError("Can't parse output of xm list,"
+                                   " line: %s" % line)
+    try:
+      data[1] = int(data[1])
+      data[2] = int(data[2])
+      data[3] = int(data[3])
+      data[5] = float(data[5])
+    except (TypeError, ValueError), err:
+      raise errors.HypervisorError("Can't parse output of xm list,"
+                                   " line: %s, error: %s" % (line, err))
+
+    # skip the Domain-0 (optional)
+    if include_node or data[0] != _DOM0_NAME:
+      result.append(data)
+
+  return result
+
+
+def _GetXmList(fn, include_node, _timeout=5):
+  """Return the list of running instances.
+
+  See L{_RunXmList} and L{_ParseXmList} for parameter details.
+
+  """
+  xmllist_errors = []
+  try:
+    lines = utils.Retry(_RunXmList, (0.3, 1.5, 1.0), _timeout,
+                        args=(fn, xmllist_errors))
+  except utils.RetryTimeout:
+    if xmllist_errors:
+      xmlist_result = xmllist_errors.pop()
+
+      errmsg = ("xm list failed, timeout exceeded (%s): %s" %
+                (xmlist_result.fail_reason, xmlist_result.output))
+    else:
+      errmsg = "xm list failed"
+
+    raise errors.HypervisorError(errmsg)
+
+  return _ParseXmList(lines, include_node)
+
+
+def _ParseNodeInfo(info):
+  """Return information about the node.
+
+  @return: a dict with the following keys (memory values in MiB):
+        - memory_total: the total memory size on the node
+        - memory_free: the available memory on the node for instances
+        - nr_cpus: total number of CPUs
+        - nr_nodes: in a NUMA system, the number of domains
+        - nr_sockets: the number of physical CPU sockets in the node
+        - hv_version: the hypervisor version in the form (major, minor)
+
+  """
+  result = {}
+  cores_per_socket = threads_per_core = nr_cpus = None
+  xen_major, xen_minor = None, None
+  memory_total = None
+  memory_free = None
+
+  for line in info.splitlines():
+    fields = line.split(":", 1)
+
+    if len(fields) < 2:
+      continue
+
+    (key, val) = map(lambda s: s.strip(), fields)
+
+    # Note: in Xen 3, memory has changed to total_memory
+    if key in ("memory", "total_memory"):
+      memory_total = int(val)
+    elif key == "free_memory":
+      memory_free = int(val)
+    elif key == "nr_cpus":
+      nr_cpus = result["cpu_total"] = int(val)
+    elif key == "nr_nodes":
+      result["cpu_nodes"] = int(val)
+    elif key == "cores_per_socket":
+      cores_per_socket = int(val)
+    elif key == "threads_per_core":
+      threads_per_core = int(val)
+    elif key == "xen_major":
+      xen_major = int(val)
+    elif key == "xen_minor":
+      xen_minor = int(val)
+
+  if None not in [cores_per_socket, threads_per_core, nr_cpus]:
+    result["cpu_sockets"] = nr_cpus / (cores_per_socket * threads_per_core)
+
+  if memory_free is not None:
+    result["memory_free"] = memory_free
+
+  if memory_total is not None:
+    result["memory_total"] = memory_total
+
+  if not (xen_major is None or xen_minor is None):
+    result[constants.HV_NODEINFO_KEY_VERSION] = (xen_major, xen_minor)
+
+  return result
+
+
+def _MergeInstanceInfo(info, fn):
+  """Updates node information from L{_ParseNodeInfo} with instance info.
+
+  @type info: dict
+  @param info: Result from L{_ParseNodeInfo}
+  @type fn: callable
+  @param fn: Function returning result of running C{xm list}
+  @rtype: dict
+
+  """
+  total_instmem = 0
+
+  for (name, _, mem, vcpus, _, _) in fn(True):
+    if name == _DOM0_NAME:
+      info["memory_dom0"] = mem
+      info["dom0_cpus"] = vcpus
+
+    # Include Dom0 in total memory usage
+    total_instmem += mem
+
+  memory_free = info.get("memory_free")
+  memory_total = info.get("memory_total")
+
+  # Calculate memory used by hypervisor
+  if None not in [memory_total, memory_free, total_instmem]:
+    info["memory_hv"] = memory_total - memory_free - total_instmem
+
+  return info
+
+
+def _GetNodeInfo(info, fn):
+  """Combines L{_MergeInstanceInfo} and L{_ParseNodeInfo}.
+
+  """
+  return _MergeInstanceInfo(_ParseNodeInfo(info), fn)
+
+
+def _GetConfigFileDiskData(block_devices, blockdev_prefix,
+                           _letters=_DISK_LETTERS):
+  """Get disk directives for Xen config file.
+
+  This method builds the xen config disk directive according to the
+  given disk_template and block_devices.
+
+  @param block_devices: list of tuples (cfdev, rldev):
+      - cfdev: dict containing ganeti config disk part
+      - rldev: ganeti.bdev.BlockDev object
+  @param blockdev_prefix: a string containing blockdevice prefix,
+                          e.g. "sd" for /dev/sda
+
+  @return: string containing disk directive for xen instance config file
+
+  """
+  if len(block_devices) > len(_letters):
+    raise errors.HypervisorError("Too many disks")
+
+  disk_data = []
+
+  for sd_suffix, (cfdev, dev_path) in zip(_letters, block_devices):
+    sd_name = blockdev_prefix + sd_suffix
+
+    if cfdev.mode == constants.DISK_RDWR:
+      mode = "w"
+    else:
+      mode = "r"
+
+    if cfdev.dev_type == constants.LD_FILE:
+      driver = _FILE_DRIVER_MAP[cfdev.physical_id[0]]
+    else:
+      driver = "phy"
+
+    disk_data.append("'%s:%s,%s,%s'" % (driver, dev_path, sd_name, mode))
+
+  return disk_data
+
+
 class XenHypervisor(hv_base.BaseHypervisor):
   """Xen generic hypervisor interface
 
@@ -95,8 +320,48 @@ class XenHypervisor(hv_base.BaseHypervisor):
     XL_CONFIG_FILE,
     ]
 
-  @staticmethod
-  def _ConfigFileName(instance_name):
+  def __init__(self, _cfgdir=None, _run_cmd_fn=None, _cmd=None):
+    hv_base.BaseHypervisor.__init__(self)
+
+    if _cfgdir is None:
+      self._cfgdir = pathutils.XEN_CONFIG_DIR
+    else:
+      self._cfgdir = _cfgdir
+
+    if _run_cmd_fn is None:
+      self._run_cmd_fn = utils.RunCmd
+    else:
+      self._run_cmd_fn = _run_cmd_fn
+
+    self._cmd = _cmd
+
+  def _GetCommand(self):
+    """Returns Xen command to use.
+
+    """
+    if self._cmd is None:
+      # TODO: Make command a hypervisor parameter
+      cmd = constants.XEN_CMD
+    else:
+      cmd = self._cmd
+
+    if cmd not in constants.KNOWN_XEN_COMMANDS:
+      raise errors.ProgrammerError("Unknown Xen command '%s'" % cmd)
+
+    return cmd
+
+  def _RunXen(self, args):
+    """Wrapper around L{utils.process.RunCmd} to run Xen command.
+
+    @see: L{utils.process.RunCmd}
+
+    """
+    cmd = [self._GetCommand()]
+    cmd.extend(args)
+
+    return self._run_cmd_fn(cmd)
+
+  def _ConfigFileName(self, instance_name):
     """Get the config file name for an instance.
 
     @param instance_name: instance name
@@ -105,39 +370,36 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @rtype: str
 
     """
-    return utils.PathJoin(pathutils.XEN_CONFIG_DIR, instance_name)
+    return utils.PathJoin(self._cfgdir, instance_name)
 
   @classmethod
-  def _WriteConfigFile(cls, instance, startup_memory, block_devices):
-    """Write the Xen config file for the instance.
+  def _GetConfig(cls, instance, startup_memory, block_devices):
+    """Build Xen configuration for an instance.
 
     """
     raise NotImplementedError
 
-  @staticmethod
-  def _WriteConfigFileStatic(instance_name, data):
+  def _WriteConfigFile(self, instance_name, data):
     """Write the Xen config file for the instance.
 
     This version of the function just writes the config file from static data.
 
     """
     # just in case it exists
-    utils.RemoveFile(utils.PathJoin(pathutils.XEN_CONFIG_DIR, "auto",
-                                    instance_name))
+    utils.RemoveFile(utils.PathJoin(self._cfgdir, "auto", instance_name))
 
-    cfg_file = XenHypervisor._ConfigFileName(instance_name)
+    cfg_file = self._ConfigFileName(instance_name)
     try:
       utils.WriteFile(cfg_file, data=data)
     except EnvironmentError, err:
       raise errors.HypervisorError("Cannot write Xen instance configuration"
                                    " file %s: %s" % (cfg_file, err))
 
-  @staticmethod
-  def _ReadConfigFile(instance_name):
+  def _ReadConfigFile(self, instance_name):
     """Returns the contents of the instance config file.
 
     """
-    filename = XenHypervisor._ConfigFileName(instance_name)
+    filename = self._ConfigFileName(instance_name)
 
     try:
       file_content = utils.ReadFile(filename)
@@ -146,81 +408,23 @@ class XenHypervisor(hv_base.BaseHypervisor):
 
     return file_content
 
-  @staticmethod
-  def _RemoveConfigFile(instance_name):
+  def _RemoveConfigFile(self, instance_name):
     """Remove the xen configuration file.
 
     """
-    utils.RemoveFile(XenHypervisor._ConfigFileName(instance_name))
+    utils.RemoveFile(self._ConfigFileName(instance_name))
 
-  @staticmethod
-  def _RunXmList(xmlist_errors):
-    """Helper function for L{_GetXMList} to run "xm list".
+  def _GetXmList(self, include_node):
+    """Wrapper around module level L{_GetXmList}.
 
     """
-    result = utils.RunCmd([constants.XEN_CMD, "list"])
-    if result.failed:
-      logging.error("xm list failed (%s): %s", result.fail_reason,
-                    result.output)
-      xmlist_errors.append(result)
-      raise utils.RetryAgain()
-
-    # skip over the heading
-    return result.stdout.splitlines()[1:]
-
-  @classmethod
-  def _GetXMList(cls, include_node):
-    """Return the list of running instances.
-
-    If the include_node argument is True, then we return information
-    for dom0 also, otherwise we filter that from the return value.
-
-    @return: list of (name, id, memory, vcpus, state, time spent)
-
-    """
-    xmlist_errors = []
-    try:
-      lines = utils.Retry(cls._RunXmList, 1, 5, args=(xmlist_errors, ))
-    except utils.RetryTimeout:
-      if xmlist_errors:
-        xmlist_result = xmlist_errors.pop()
-
-        errmsg = ("xm list failed, timeout exceeded (%s): %s" %
-                  (xmlist_result.fail_reason, xmlist_result.output))
-      else:
-        errmsg = "xm list failed"
-
-      raise errors.HypervisorError(errmsg)
-
-    result = []
-    for line in lines:
-      # The format of lines is:
-      # Name      ID Mem(MiB) VCPUs State  Time(s)
-      # Domain-0   0  3418     4 r-----    266.2
-      data = line.split()
-      if len(data) != 6:
-        raise errors.HypervisorError("Can't parse output of xm list,"
-                                     " line: %s" % line)
-      try:
-        data[1] = int(data[1])
-        data[2] = int(data[2])
-        data[3] = int(data[3])
-        data[5] = float(data[5])
-      except (TypeError, ValueError), err:
-        raise errors.HypervisorError("Can't parse output of xm list,"
-                                     " line: %s, error: %s" % (line, err))
-
-      # skip the Domain-0 (optional)
-      if include_node or data[0] != _DOM0_NAME:
-        result.append(data)
-
-    return result
+    return _GetXmList(lambda: self._RunXen(["list"]), include_node)
 
   def ListInstances(self):
     """Get the list of running instances.
 
     """
-    xm_list = self._GetXMList(False)
+    xm_list = self._GetXmList(False)
     names = [info[0] for info in xm_list]
     return names
 
@@ -232,7 +436,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @return: tuple (name, id, memory, vcpus, stat, times)
 
     """
-    xm_list = self._GetXMList(instance_name == _DOM0_NAME)
+    xm_list = self._GetXmList(instance_name == _DOM0_NAME)
     result = None
     for data in xm_list:
       if data[0] == instance_name:
@@ -246,21 +450,37 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @return: list of tuples (name, id, memory, vcpus, stat, times)
 
     """
-    xm_list = self._GetXMList(False)
+    xm_list = self._GetXmList(False)
     return xm_list
 
+  def _MakeConfigFile(self, instance, startup_memory, block_devices):
+    """Gather configuration details and write to disk.
+
+    See L{_GetConfig} for arguments.
+
+    """
+    buf = StringIO()
+    buf.write("# Automatically generated by Ganeti. Do not edit!\n")
+    buf.write("\n")
+    buf.write(self._GetConfig(instance, startup_memory, block_devices))
+    buf.write("\n")
+
+    self._WriteConfigFile(instance.name, buf.getvalue())
+
   def StartInstance(self, instance, block_devices, startup_paused):
     """Start an instance.
 
     """
     startup_memory = self._InstanceStartupMemory(instance)
-    self._WriteConfigFile(instance, startup_memory, block_devices)
-    cmd = [constants.XEN_CMD, "create"]
+
+    self._MakeConfigFile(instance, startup_memory, block_devices)
+
+    cmd = ["create"]
     if startup_paused:
-      cmd.extend(["-p"])
-    cmd.extend([self._ConfigFileName(instance.name)])
-    result = utils.RunCmd(cmd)
+      cmd.append("-p")
+    cmd.append(self._ConfigFileName(instance.name))
 
+    result = self._RunXen(cmd)
     if result.failed:
       raise errors.HypervisorError("Failed to start instance %s: %s (%s)" %
                                    (instance.name, result.fail_reason,
@@ -273,12 +493,18 @@ class XenHypervisor(hv_base.BaseHypervisor):
     if name is None:
       name = instance.name
 
+    return self._StopInstance(name, force)
+
+  def _StopInstance(self, name, force):
+    """Stop an instance.
+
+    """
     if force:
-      command = [constants.XEN_CMD, "destroy", name]
+      action = "destroy"
     else:
-      command = [constants.XEN_CMD, "shutdown", name]
-    result = utils.RunCmd(command)
+      action = "shutdown"
 
+    result = self._RunXen([action, name])
     if result.failed:
       raise errors.HypervisorError("Failed to stop instance %s: %s, %s" %
                                    (name, result.fail_reason, result.output))
@@ -296,7 +522,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
       raise errors.HypervisorError("Failed to reboot instance %s,"
                                    " not running" % instance.name)
 
-    result = utils.RunCmd([constants.XEN_CMD, "reboot", instance.name])
+    result = self._RunXen(["reboot", instance.name])
     if result.failed:
       raise errors.HypervisorError("Failed to reboot instance %s: %s, %s" %
                                    (instance.name, result.fail_reason,
@@ -329,14 +555,16 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @param mem: actual memory size to use for instance runtime
 
     """
-    cmd = [constants.XEN_CMD, "mem-set", instance.name, mem]
-    result = utils.RunCmd(cmd)
+    result = self._RunXen(["mem-set", instance.name, mem])
     if result.failed:
       raise errors.HypervisorError("Failed to balloon instance %s: %s (%s)" %
                                    (instance.name, result.fail_reason,
                                     result.output))
+
+    # Update configuration file
     cmd = ["sed", "-ie", "s/^memory.*$/memory = %s/" % mem]
-    cmd.append(XenHypervisor._ConfigFileName(instance.name))
+    cmd.append(self._ConfigFileName(instance.name))
+
     result = utils.RunCmd(cmd)
     if result.failed:
       raise errors.HypervisorError("Failed to update memory for %s: %s (%s)" %
@@ -346,80 +574,16 @@ class XenHypervisor(hv_base.BaseHypervisor):
   def GetNodeInfo(self):
     """Return information about the node.
 
-    @return: a dict with the following keys (memory values in MiB):
-          - memory_total: the total memory size on the node
-          - memory_free: the available memory on the node for instances
-          - memory_dom0: the memory used by the node itself, if available
-          - nr_cpus: total number of CPUs
-          - nr_nodes: in a NUMA system, the number of domains
-          - nr_sockets: the number of physical CPU sockets in the node
-          - hv_version: the hypervisor version in the form (major, minor)
+    @see: L{_GetNodeInfo} and L{_ParseNodeInfo}
 
     """
-    result = utils.RunCmd([constants.XEN_CMD, "info"])
+    result = self._RunXen(["info"])
     if result.failed:
       logging.error("Can't run 'xm info' (%s): %s", result.fail_reason,
                     result.output)
       return None
 
-    xmoutput = result.stdout.splitlines()
-    result = {}
-    cores_per_socket = threads_per_core = nr_cpus = None
-    xen_major, xen_minor = None, None
-    memory_total = None
-    memory_free = None
-
-    for line in xmoutput:
-      splitfields = line.split(":", 1)
-
-      if len(splitfields) > 1:
-        key = splitfields[0].strip()
-        val = splitfields[1].strip()
-
-        # note: in xen 3, memory has changed to total_memory
-        if key == "memory" or key == "total_memory":
-          memory_total = int(val)
-        elif key == "free_memory":
-          memory_free = int(val)
-        elif key == "nr_cpus":
-          nr_cpus = result["cpu_total"] = int(val)
-        elif key == "nr_nodes":
-          result["cpu_nodes"] = int(val)
-        elif key == "cores_per_socket":
-          cores_per_socket = int(val)
-        elif key == "threads_per_core":
-          threads_per_core = int(val)
-        elif key == "xen_major":
-          xen_major = int(val)
-        elif key == "xen_minor":
-          xen_minor = int(val)
-
-    if None not in [cores_per_socket, threads_per_core, nr_cpus]:
-      result["cpu_sockets"] = nr_cpus / (cores_per_socket * threads_per_core)
-
-    total_instmem = 0
-    for (name, _, mem, vcpus, _, _) in self._GetXMList(True):
-      if name == _DOM0_NAME:
-        result["memory_dom0"] = mem
-        result["dom0_cpus"] = vcpus
-
-      # Include Dom0 in total memory usage
-      total_instmem += mem
-
-    if memory_free is not None:
-      result["memory_free"] = memory_free
-
-    if memory_total is not None:
-      result["memory_total"] = memory_total
-
-    # Calculate memory used by hypervisor
-    if None not in [memory_total, memory_free, total_instmem]:
-      result["memory_hv"] = memory_total - memory_free - total_instmem
-
-    if not (xen_major is None or xen_minor is None):
-      result[constants.HV_NODEINFO_KEY_VERSION] = (xen_major, xen_minor)
-
-    return result
+    return _GetNodeInfo(result.stdout, self._GetXmList)
 
   @classmethod
   def GetInstanceConsole(cls, instance, hvparams, beparams):
@@ -441,51 +605,12 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @return: Problem description if something is wrong, C{None} otherwise
 
     """
-    result = utils.RunCmd([constants.XEN_CMD, "info"])
+    result = self._RunXen(["info"])
     if result.failed:
       return "'xm info' failed: %s, %s" % (result.fail_reason, result.output)
 
     return None
 
-  @staticmethod
-  def _GetConfigFileDiskData(block_devices, blockdev_prefix):
-    """Get disk directive for xen config file.
-
-    This method builds the xen config disk directive according to the
-    given disk_template and block_devices.
-
-    @param block_devices: list of tuples (cfdev, rldev):
-        - cfdev: dict containing ganeti config disk part
-        - rldev: ganeti.bdev.BlockDev object
-    @param blockdev_prefix: a string containing blockdevice prefix,
-                            e.g. "sd" for /dev/sda
-
-    @return: string containing disk directive for xen instance config file
-
-    """
-    FILE_DRIVER_MAP = {
-      constants.FD_LOOP: "file",
-      constants.FD_BLKTAP: "tap:aio",
-      }
-    disk_data = []
-    if len(block_devices) > 24:
-      # 'z' - 'a' = 24
-      raise errors.HypervisorError("Too many disks")
-    namespace = [blockdev_prefix + chr(i + ord("a")) for i in range(24)]
-    for sd_name, (cfdev, dev_path) in zip(namespace, block_devices):
-      if cfdev.mode == constants.DISK_RDWR:
-        mode = "w"
-      else:
-        mode = "r"
-      if cfdev.dev_type == constants.LD_FILE:
-        line = "'%s:%s,%s,%s'" % (FILE_DRIVER_MAP[cfdev.physical_id[0]],
-                                  dev_path, sd_name, mode)
-      else:
-        line = "'phy:%s,%s,%s'" % (dev_path, sd_name, mode)
-      disk_data.append(line)
-
-    return disk_data
-
   def MigrationInfo(self, instance):
     """Get instance information to perform a migration.
 
@@ -525,7 +650,7 @@ class XenHypervisor(hv_base.BaseHypervisor):
 
     """
     if success:
-      self._WriteConfigFileStatic(instance.name, info)
+      self._WriteConfigFile(instance.name, info)
 
   def MigrateInstance(self, instance, target, live):
     """Migrate an instance to a target node.
@@ -541,34 +666,53 @@ class XenHypervisor(hv_base.BaseHypervisor):
     @param live: perform a live migration
 
     """
-    if self.GetInstanceInfo(instance.name) is None:
+    port = instance.hvparams[constants.HV_MIGRATION_PORT]
+
+    # TODO: Pass cluster name via RPC
+    cluster_name = ssconf.SimpleStore().GetClusterName()
+
+    return self._MigrateInstance(cluster_name, instance.name, target, port,
+                                 live)
+
+  def _MigrateInstance(self, cluster_name, instance_name, target, port, live,
+                       _ping_fn=netutils.TcpPing):
+    """Migrate an instance to a target node.
+
+    @see: L{MigrateInstance} for details
+
+    """
+    if self.GetInstanceInfo(instance_name) is None:
       raise errors.HypervisorError("Instance not running, cannot migrate")
 
-    port = instance.hvparams[constants.HV_MIGRATION_PORT]
+    cmd = self._GetCommand()
 
-    if (constants.XEN_CMD == constants.XEN_CMD_XM and
-        not netutils.TcpPing(target, port, live_port_needed=True)):
+    if (cmd == constants.XEN_CMD_XM and
+        not _ping_fn(target, port, live_port_needed=True)):
       raise errors.HypervisorError("Remote host %s not listening on port"
                                    " %s, cannot migrate" % (target, port))
 
-    args = [constants.XEN_CMD, "migrate"]
-    if constants.XEN_CMD == constants.XEN_CMD_XM:
+    args = ["migrate"]
+
+    if cmd == constants.XEN_CMD_XM:
       args.extend(["-p", "%d" % port])
       if live:
         args.append("-l")
-    elif constants.XEN_CMD == constants.XEN_CMD_XL:
-      cluster_name = ssconf.SimpleStore().GetClusterName()
-      args.extend(["-s", constants.XL_SSH_CMD % cluster_name])
-      args.extend(["-C", self._ConfigFileName(instance.name)])
+
+    elif cmd == constants.XEN_CMD_XL:
+      args.extend([
+        "-s", constants.XL_SSH_CMD % cluster_name,
+        "-C", self._ConfigFileName(instance_name),
+        ])
+
     else:
-      raise errors.HypervisorError("Unsupported xen command: %s" %
-                                   constants.XEN_CMD)
+      raise errors.HypervisorError("Unsupported Xen command: %s" % self._cmd)
 
-    args.extend([instance.name, target])
-    result = utils.RunCmd(args)
+    args.extend([instance_name, target])
+
+    result = self._RunXen(args)
     if result.failed:
       raise errors.HypervisorError("Failed to migrate instance %s: %s" %
-                                   (instance.name, result.output))
+                                   (instance_name, result.output))
 
   def FinalizeMigrationSource(self, instance, success, live):
     """Finalize the instance migration on the source node.
@@ -647,8 +791,7 @@ class XenPvmHypervisor(XenHypervisor):
       (False, lambda x: 0 < x < 65536, "invalid weight", None, None),
     }
 
-  @classmethod
-  def _WriteConfigFile(cls, instance, startup_memory, block_devices):
+  def _GetConfig(self, instance, startup_memory, block_devices):
     """Write the Xen config file for the instance.
 
     """
@@ -706,8 +849,8 @@ class XenPvmHypervisor(XenHypervisor):
         nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK]
       vif_data.append("'%s'" % nic_str)
 
-    disk_data = cls._GetConfigFileDiskData(block_devices,
-                                           hvp[constants.HV_BLOCKDEV_PREFIX])
+    disk_data = \
+      _GetConfigFileDiskData(block_devices, hvp[constants.HV_BLOCKDEV_PREFIX])
 
     config.write("vif = [%s]\n" % ",".join(vif_data))
     config.write("disk = [%s]\n" % ",".join(disk_data))
@@ -721,9 +864,8 @@ class XenPvmHypervisor(XenHypervisor):
       config.write("on_reboot = 'destroy'\n")
     config.write("on_crash = 'restart'\n")
     config.write("extra = '%s'\n" % hvp[constants.HV_KERNEL_ARGS])
-    cls._WriteConfigFileStatic(instance.name, config.getvalue())
 
-    return True
+    return config.getvalue()
 
 
 class XenHvmHypervisor(XenHypervisor):
@@ -769,15 +911,13 @@ class XenHvmHypervisor(XenHypervisor):
       (False, lambda x: 0 < x < 65535, "invalid weight", None, None),
     }
 
-  @classmethod
-  def _WriteConfigFile(cls, instance, startup_memory, block_devices):
+  def _GetConfig(self, instance, startup_memory, block_devices):
     """Create a Xen 3.1 HVM config file.
 
     """
     hvp = instance.hvparams
 
     config = StringIO()
-    config.write("# this is autogenerated by Ganeti, please do not edit\n#\n")
 
     # kernel handling
     kpath = hvp[constants.HV_KERNEL_PATH]
@@ -859,8 +999,8 @@ class XenHvmHypervisor(XenHypervisor):
 
     config.write("vif = [%s]\n" % ",".join(vif_data))
 
-    disk_data = cls._GetConfigFileDiskData(block_devices,
-                                           hvp[constants.HV_BLOCKDEV_PREFIX])
+    disk_data = \
+      _GetConfigFileDiskData(block_devices, hvp[constants.HV_BLOCKDEV_PREFIX])
 
     iso_path = hvp[constants.HV_CDROM_IMAGE_PATH]
     if iso_path:
@@ -880,6 +1020,5 @@ class XenHvmHypervisor(XenHypervisor):
     else:
       config.write("on_reboot = 'destroy'\n")
     config.write("on_crash = 'restart'\n")
-    cls._WriteConfigFileStatic(instance.name, config.getvalue())
 
-    return True
+    return config.getvalue()
index a1c51c5..170a8b7 100644 (file)
@@ -29,6 +29,9 @@ from bitarray import bitarray
 
 from ganeti import errors
 
+IPV4_NETWORK_MIN_SIZE = 30
+IPV4_NETWORK_MIN_NUM_HOSTS = 2 ** (32 - IPV4_NETWORK_MIN_SIZE)
+
 
 class AddressPool(object):
   """Address pool class, wrapping an C{objects.Network} object.
@@ -55,6 +58,12 @@ class AddressPool(object):
     self.net = network
 
     self.network = ipaddr.IPNetwork(self.net.network)
+    if self.network.numhosts < IPV4_NETWORK_MIN_NUM_HOSTS:
+      raise errors.AddressPoolError("A network with only %s host(s) is too"
+                                    " small, please specify at least a /%s"
+                                    " network" %
+                                    (str(self.network.numhosts),
+                                     IPV4_NETWORK_MIN_SIZE))
     if self.net.gateway:
       self.gateway = ipaddr.IPAddress(self.net.gateway)
 
index 588ef03..fa811a6 100644 (file)
@@ -264,47 +264,6 @@ class ConfigObject(outils.ValidatedSlots):
     obj = cls(**val_str) # pylint: disable=W0142
     return obj
 
-  @staticmethod
-  def _ContainerToDicts(container):
-    """Convert the elements of a container to standard python types.
-
-    This method converts a container with elements derived from
-    ConfigData to standard python types. If the container is a dict,
-    we don't touch the keys, only the values.
-
-    """
-    if isinstance(container, dict):
-      ret = dict([(k, v.ToDict()) for k, v in container.iteritems()])
-    elif isinstance(container, (list, tuple, set, frozenset)):
-      ret = [elem.ToDict() for elem in container]
-    else:
-      raise TypeError("Invalid type %s passed to _ContainerToDicts" %
-                      type(container))
-    return ret
-
-  @staticmethod
-  def _ContainerFromDicts(source, c_type, e_type):
-    """Convert a container from standard python types.
-
-    This method converts a container with standard python types to
-    ConfigData objects. If the container is a dict, we don't touch the
-    keys, only the values.
-
-    """
-    if not isinstance(c_type, type):
-      raise TypeError("Container type %s passed to _ContainerFromDicts is"
-                      " not a type" % type(c_type))
-    if source is None:
-      source = c_type()
-    if c_type is dict:
-      ret = dict([(k, e_type.FromDict(v)) for k, v in source.iteritems()])
-    elif c_type in (list, tuple, set, frozenset):
-      ret = c_type([e_type.FromDict(elem) for elem in source])
-    else:
-      raise TypeError("Invalid container type %s passed to"
-                      " _ContainerFromDicts" % c_type)
-    return ret
-
   def Copy(self):
     """Makes a deep copy of the current object and its children.
 
@@ -447,7 +406,7 @@ class ConfigData(ConfigObject):
     mydict = super(ConfigData, self).ToDict()
     mydict["cluster"] = mydict["cluster"].ToDict()
     for key in "nodes", "instances", "nodegroups", "networks":
-      mydict[key] = self._ContainerToDicts(mydict[key])
+      mydict[key] = outils.ContainerToDicts(mydict[key])
 
     return mydict
 
@@ -458,10 +417,12 @@ class ConfigData(ConfigObject):
     """
     obj = super(ConfigData, cls).FromDict(val)
     obj.cluster = Cluster.FromDict(obj.cluster)
-    obj.nodes = cls._ContainerFromDicts(obj.nodes, dict, Node)
-    obj.instances = cls._ContainerFromDicts(obj.instances, dict, Instance)
-    obj.nodegroups = cls._ContainerFromDicts(obj.nodegroups, dict, NodeGroup)
-    obj.networks = cls._ContainerFromDicts(obj.networks, dict, Network)
+    obj.nodes = outils.ContainerFromDicts(obj.nodes, dict, Node)
+    obj.instances = \
+      outils.ContainerFromDicts(obj.instances, dict, Instance)
+    obj.nodegroups = \
+      outils.ContainerFromDicts(obj.nodegroups, dict, NodeGroup)
+    obj.networks = outils.ContainerFromDicts(obj.networks, dict, Network)
     return obj
 
   def HasAnyDiskOfType(self, dev_type):
@@ -771,7 +732,7 @@ class Disk(ConfigObject):
     for attr in ("children",):
       alist = bo.get(attr, None)
       if alist:
-        bo[attr] = self._ContainerToDicts(alist)
+        bo[attr] = outils.ContainerToDicts(alist)
     return bo
 
   @classmethod
@@ -781,7 +742,7 @@ class Disk(ConfigObject):
     """
     obj = super(Disk, cls).FromDict(val)
     if obj.children:
-      obj.children = cls._ContainerFromDicts(obj.children, list, Disk)
+      obj.children = outils.ContainerFromDicts(obj.children, list, Disk)
     if obj.logical_id and isinstance(obj.logical_id, list):
       obj.logical_id = tuple(obj.logical_id)
     if obj.physical_id and isinstance(obj.physical_id, list):
@@ -1139,7 +1100,7 @@ class Instance(TaggableObject):
     for attr in "nics", "disks":
       alist = bo.get(attr, None)
       if alist:
-        nlist = self._ContainerToDicts(alist)
+        nlist = outils.ContainerToDicts(alist)
       else:
         nlist = []
       bo[attr] = nlist
@@ -1158,8 +1119,8 @@ class Instance(TaggableObject):
     if "admin_up" in val:
       del val["admin_up"]
     obj = super(Instance, cls).FromDict(val)
-    obj.nics = cls._ContainerFromDicts(obj.nics, list, NIC)
-    obj.disks = cls._ContainerFromDicts(obj.disks, list, Disk)
+    obj.nics = outils.ContainerFromDicts(obj.nics, list, NIC)
+    obj.disks = outils.ContainerFromDicts(obj.disks, list, Disk)
     return obj
 
   def UpgradeConfig(self):
@@ -1353,12 +1314,12 @@ class Node(TaggableObject):
 
     hv_state = data.get("hv_state", None)
     if hv_state is not None:
-      data["hv_state"] = self._ContainerToDicts(hv_state)
+      data["hv_state"] = outils.ContainerToDicts(hv_state)
 
     disk_state = data.get("disk_state", None)
     if disk_state is not None:
       data["disk_state"] = \
-        dict((key, self._ContainerToDicts(value))
+        dict((key, outils.ContainerToDicts(value))
              for (key, value) in disk_state.items())
 
     return data
@@ -1371,11 +1332,12 @@ class Node(TaggableObject):
     obj = super(Node, cls).FromDict(val)
 
     if obj.hv_state is not None:
-      obj.hv_state = cls._ContainerFromDicts(obj.hv_state, dict, NodeHvState)
+      obj.hv_state = \
+        outils.ContainerFromDicts(obj.hv_state, dict, NodeHvState)
 
     if obj.disk_state is not None:
       obj.disk_state = \
-        dict((key, cls._ContainerFromDicts(value, dict, NodeDiskState))
+        dict((key, outils.ContainerFromDicts(value, dict, NodeDiskState))
              for (key, value) in obj.disk_state.items())
 
     return obj
@@ -1632,7 +1594,14 @@ class Cluster(TaggableObject):
 
     """
     mydict = super(Cluster, self).ToDict()
-    mydict["tcpudp_port_pool"] = list(self.tcpudp_port_pool)
+
+    if self.tcpudp_port_pool is None:
+      tcpudp_port_pool = []
+    else:
+      tcpudp_port_pool = list(self.tcpudp_port_pool)
+
+    mydict["tcpudp_port_pool"] = tcpudp_port_pool
+
     return mydict
 
   @classmethod
@@ -1641,8 +1610,12 @@ class Cluster(TaggableObject):
 
     """
     obj = super(Cluster, cls).FromDict(val)
-    if not isinstance(obj.tcpudp_port_pool, set):
+
+    if obj.tcpudp_port_pool is None:
+      obj.tcpudp_port_pool = set()
+    elif not isinstance(obj.tcpudp_port_pool, set):
       obj.tcpudp_port_pool = set(obj.tcpudp_port_pool)
+
     return obj
 
   def SimpleFillDP(self, diskparams):
@@ -1933,7 +1906,7 @@ class _QueryResponseBase(ConfigObject):
 
     """
     mydict = super(_QueryResponseBase, self).ToDict()
-    mydict["fields"] = self._ContainerToDicts(mydict["fields"])
+    mydict["fields"] = outils.ContainerToDicts(mydict["fields"])
     return mydict
 
   @classmethod
@@ -1942,7 +1915,8 @@ class _QueryResponseBase(ConfigObject):
 
     """
     obj = super(_QueryResponseBase, cls).FromDict(val)
-    obj.fields = cls._ContainerFromDicts(obj.fields, list, QueryFieldDefinition)
+    obj.fields = \
+      outils.ContainerFromDicts(obj.fields, list, QueryFieldDefinition)
     return obj
 
 
index 73d5cb3..3015a54 100644 (file)
@@ -1957,7 +1957,7 @@ class OpTestDelay(OpCode):
   This is used just for debugging and testing.
 
   Parameters:
-    - duration: the time to sleep
+    - duration: the time to sleep, in seconds
     - on_master: if true, sleep on the master
     - on_nodes: list of nodes in which to sleep
 
index e2742b1..f0f6558 100644 (file)
 """Module for object related utils."""
 
 
+#: Supported container types for serialization/de-serialization (must be a
+#: tuple as it's used as a parameter for C{isinstance})
+_SEQUENCE_TYPES = (list, tuple, set, frozenset)
+
+
 class AutoSlots(type):
   """Meta base class for __slots__ definitions.
 
@@ -91,3 +96,55 @@ class ValidatedSlots(object):
 
     """
     raise NotImplementedError
+
+
+def ContainerToDicts(container):
+  """Convert the elements of a container to standard Python types.
+
+  This method converts a container with elements to standard Python types. If
+  the input container is of the type C{dict}, only its values are touched.
+  Those values, as well as all elements of input sequences, must support a
+  C{ToDict} method returning a serialized version.
+
+  @type container: dict or sequence (see L{_SEQUENCE_TYPES})
+
+  """
+  if isinstance(container, dict):
+    ret = dict([(k, v.ToDict()) for k, v in container.items()])
+  elif isinstance(container, _SEQUENCE_TYPES):
+    ret = [elem.ToDict() for elem in container]
+  else:
+    raise TypeError("Unknown container type '%s'" % type(container))
+
+  return ret
+
+
+def ContainerFromDicts(source, c_type, e_type):
+  """Convert a container from standard python types.
+
+  This method converts a container with standard Python types to objects. If
+  the container is a dict, we don't touch the keys, only the values.
+
+  @type source: None, dict or sequence (see L{_SEQUENCE_TYPES})
+  @param source: Input data
+  @type c_type: type class
+  @param c_type: Desired type for returned container
+  @type e_type: element type class
+  @param e_type: Item type for elements in returned container (must have a
+    C{FromDict} class method)
+
+  """
+  if not isinstance(c_type, type):
+    raise TypeError("Container type '%s' is not a type" % type(c_type))
+
+  if source is None:
+    source = c_type()
+
+  if c_type is dict:
+    ret = dict([(k, e_type.FromDict(v)) for k, v in source.items()])
+  elif c_type in _SEQUENCE_TYPES:
+    ret = c_type(map(e_type.FromDict, source))
+  else:
+    raise TypeError("Unknown container type '%s'" % c_type)
+
+  return ret
index 3a28a84..4e07d34 100644 (file)
@@ -1,7 +1,7 @@
 #
 #
 
-# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
+# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -397,7 +397,7 @@ class R_2_nodes(baserlib.OpcodeResource):
     """Returns a list of all nodes.
 
     """
-    client = self.GetClient(query=False)
+    client = self.GetClient(query=True)
 
     if self.useBulk():
       bulkdata = client.QueryNodes([], N_FIELDS, False)
@@ -420,7 +420,7 @@ class R_2_nodes_name(baserlib.OpcodeResource):
 
     """
     node_name = self.items[0]
-    client = self.GetClient(query=False)
+    client = self.GetClient(query=True)
 
     result = baserlib.HandleItemQueryErrors(client.QueryNodes,
                                             names=[node_name], fields=N_FIELDS,
index 3b6f6f3..7c98c6f 100644 (file)
@@ -37,9 +37,10 @@ def TestDelay(duration):
   """Sleep for a fixed amount of time.
 
   @type duration: float
-  @param duration: the sleep duration
-  @rtype: boolean
-  @return: False for negative value, True otherwise
+  @param duration: the sleep duration, in seconds
+  @rtype: (boolean, str)
+  @return: False for negative value, and an accompanying error message;
+      True otherwise (and msg is None)
 
   """
   if duration < 0:
index ed28cab..407c352 100644 (file)
@@ -142,7 +142,7 @@ def GetVirtualHostname():
   return _VIRT_HOSTNAME
 
 
-def _MakeNodeRoot(base, node_name):
+def MakeNodeRoot(base, node_name):
   """Appends a node name to the base directory.
 
   """
@@ -162,7 +162,7 @@ def ExchangeNodeRoot(node_name, filename,
   """
   if _basedir:
     pure = _RemoveNodePrefix(filename, _noderoot=_noderoot)
-    result = "%s/%s" % (_MakeNodeRoot(_basedir, node_name), pure)
+    result = "%s/%s" % (MakeNodeRoot(_basedir, node_name), pure)
   else:
     result = filename
 
@@ -175,7 +175,7 @@ def EnvironmentForHost(hostname, _basedir=_VIRT_BASEDIR):
   """
   if _basedir:
     return {
-      _ROOTDIR_ENVNAME: _MakeNodeRoot(_basedir, hostname),
+      _ROOTDIR_ENVNAME: MakeNodeRoot(_basedir, hostname),
       _HOSTNAME_ENVNAME: hostname,
       }
   else:
index 41ad321..a346eac 100755 (executable)
@@ -37,6 +37,7 @@ import qa_env
 import qa_error
 import qa_group
 import qa_instance
+import qa_network
 import qa_node
 import qa_os
 import qa_job
@@ -275,8 +276,8 @@ def RunCommonInstanceTests(instance):
   if qa_config.TestEnabled("instance-rename"):
     tgt_instance = qa_config.AcquireInstance()
     try:
-      rename_source = instance["name"]
-      rename_target = tgt_instance["name"]
+      rename_source = instance.name
+      rename_target = tgt_instance.name
       # perform instance rename to the same name
       RunTest(qa_instance.TestInstanceRenameAndBack,
               rename_source, rename_source)
@@ -289,7 +290,7 @@ def RunCommonInstanceTests(instance):
         RunTestIf("rapi", qa_rapi.TestRapiInstanceRenameAndBack,
                   rename_source, rename_target)
     finally:
-      qa_config.ReleaseInstance(tgt_instance)
+      tgt_instance.Release()
 
   RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
 
@@ -328,6 +329,14 @@ def RunGroupListTests():
   RunTestIf("group-list", qa_group.TestGroupListFields)
 
 
+def RunNetworkTests():
+  """Run tests for network management.
+
+  """
+  RunTestIf("network", qa_network.TestNetworkAddRemove)
+  RunTestIf("network", qa_network.TestNetworkConnect)
+
+
 def RunGroupRwTests():
   """Run tests for adding/removing/renaming groups.
 
@@ -366,9 +375,9 @@ def RunExportImportTests(instance, inodes):
           RunTest(qa_instance.TestInstanceStartup, newinst)
           RunTest(qa_instance.TestInstanceRemove, newinst)
         finally:
-          qa_config.ReleaseInstance(newinst)
+          newinst.Release()
     finally:
-      qa_config.ReleaseNode(expnode)
+      expnode.Release()
 
   if qa_config.TestEnabled(["rapi", "inter-cluster-instance-move"]):
     newinst = qa_config.AcquireInstance()
@@ -378,9 +387,9 @@ def RunExportImportTests(instance, inodes):
         RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
                 inodes, tnode)
       finally:
-        qa_config.ReleaseNode(tnode)
+        tnode.Release()
     finally:
-      qa_config.ReleaseInstance(newinst)
+      newinst.Release()
 
 
 def RunDaemonTests(instance):
@@ -460,26 +469,40 @@ def RunExclusiveStorageTests():
     if qa_config.TestEnabled("instance-add-plain-disk"):
       # Make sure that the cluster doesn't have any pre-existing problem
       qa_cluster.AssertClusterVerify()
+
+      # Create and allocate instances
       instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
-      instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
-      # cluster-verify checks that disks are allocated correctly
-      qa_cluster.AssertClusterVerify()
-      qa_instance.TestInstanceRemove(instance1)
-      qa_instance.TestInstanceRemove(instance2)
+      try:
+        instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
+        try:
+          # cluster-verify checks that disks are allocated correctly
+          qa_cluster.AssertClusterVerify()
+
+          # Remove instances
+          qa_instance.TestInstanceRemove(instance2)
+          qa_instance.TestInstanceRemove(instance1)
+        finally:
+          instance2.Release()
+      finally:
+        instance1.Release()
+
     if qa_config.TestEnabled("instance-add-drbd-disk"):
       snode = qa_config.AcquireNode()
       try:
         qa_cluster.TestSetExclStorCluster(False)
         instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
-        qa_cluster.TestSetExclStorCluster(True)
-        exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
-        qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
-        qa_instance.TestInstanceRemove(instance)
+        try:
+          qa_cluster.TestSetExclStorCluster(True)
+          exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
+          qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
+          qa_instance.TestInstanceRemove(instance)
+        finally:
+          instance.Release()
       finally:
-        qa_config.ReleaseNode(snode)
+        snode.Release()
     qa_cluster.TestSetExclStorCluster(old_es)
   finally:
-    qa_config.ReleaseNode(node)
+    node.Release()
 
 
 def RunInstanceTests():
@@ -497,25 +520,28 @@ def RunInstanceTests():
       inodes = qa_config.AcquireManyNodes(num_nodes)
       try:
         instance = RunTest(create_fun, inodes)
-
-        RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
-        RunDaemonTests(instance)
-        for node in inodes:
-          RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node)
-        if len(inodes) > 1:
-          RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
-                    constants.INITIAL_NODE_GROUP_NAME,
-                    inodes[0]["primary"], inodes[1]["primary"])
-        if qa_config.TestEnabled("instance-convert-disk"):
-          RunTest(qa_instance.TestInstanceShutdown, instance)
-          RunTest(qa_instance.TestInstanceConvertDiskToPlain, instance, inodes)
-          RunTest(qa_instance.TestInstanceStartup, instance)
-        RunCommonInstanceTests(instance)
-        RunGroupListTests()
-        RunExportImportTests(instance, inodes)
-        RunHardwareFailureTests(instance, inodes)
-        RunRepairDiskSizes()
-        RunTest(qa_instance.TestInstanceRemove, instance)
+        try:
+          RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
+          RunDaemonTests(instance)
+          for node in inodes:
+            RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node)
+          if len(inodes) > 1:
+            RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
+                      constants.INITIAL_NODE_GROUP_NAME,
+                      inodes[0].primary, inodes[1].primary)
+          if qa_config.TestEnabled("instance-convert-disk"):
+            RunTest(qa_instance.TestInstanceShutdown, instance)
+            RunTest(qa_instance.TestInstanceConvertDiskToPlain,
+                    instance, inodes)
+            RunTest(qa_instance.TestInstanceStartup, instance)
+          RunCommonInstanceTests(instance)
+          RunGroupListTests()
+          RunExportImportTests(instance, inodes)
+          RunHardwareFailureTests(instance, inodes)
+          RunRepairDiskSizes()
+          RunTest(qa_instance.TestInstanceRemove, instance)
+        finally:
+          instance.Release()
         del instance
       finally:
         qa_config.ReleaseManyNodes(inodes)
@@ -543,6 +569,7 @@ def RunQa():
   RunCommonNodeTests()
   RunGroupListTests()
   RunGroupRwTests()
+  RunNetworkTests()
 
   # The master shouldn't be readded or put offline; "delay" needs a non-master
   # node to test
@@ -552,7 +579,7 @@ def RunQa():
     RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
     RunTestIf("delay", qa_cluster.TestDelay, pnode)
   finally:
-    qa_config.ReleaseNode(pnode)
+    pnode.Release()
 
   # Make sure the cluster is clean before running instance tests
   qa_cluster.AssertClusterVerify()
@@ -568,13 +595,16 @@ def RunQa():
         for use_client in [True, False]:
           rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
                                   use_client)
-          if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
-            RunCommonInstanceTests(rapi_instance)
-          RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
+          try:
+            if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
+              RunCommonInstanceTests(rapi_instance)
+            RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
+          finally:
+            rapi_instance.Release()
           del rapi_instance
 
   finally:
-    qa_config.ReleaseNode(pnode)
+    pnode.Release()
 
   config_list = [
     ("default-instance-tests", lambda: None, lambda _: None),
@@ -593,21 +623,24 @@ def RunQa():
     if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
       for shutdown in [False, True]:
         instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
-        expnode = qa_config.AcquireNode(exclude=pnode)
         try:
-          if shutdown:
-            # Stop instance before exporting and removing it
-            RunTest(qa_instance.TestInstanceShutdown, instance)
-          RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
-          RunTest(qa_instance.TestBackupList, expnode)
+          expnode = qa_config.AcquireNode(exclude=pnode)
+          try:
+            if shutdown:
+              # Stop instance before exporting and removing it
+              RunTest(qa_instance.TestInstanceShutdown, instance)
+            RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
+            RunTest(qa_instance.TestBackupList, expnode)
+          finally:
+            expnode.Release()
         finally:
-          qa_config.ReleaseNode(expnode)
+          instance.Release()
         del expnode
         del instance
       qa_cluster.AssertClusterVerify()
 
   finally:
-    qa_config.ReleaseNode(pnode)
+    pnode.Release()
 
   RunExclusiveStorageTests()
 
@@ -624,9 +657,9 @@ def RunQa():
         RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
                 set_offline, set_online)
       finally:
-        qa_config.ReleaseNode(pnode)
+        pnode.Release()
     finally:
-      qa_config.ReleaseNode(snode)
+      snode.Release()
     qa_cluster.AssertClusterVerify()
 
   RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
@@ -643,14 +676,14 @@ def main():
   parser.add_option("--yes-do-it", dest="yes_do_it",
                     action="store_true",
                     help="Really execute the tests")
-  (qa_config.options, args) = parser.parse_args()
+  (opts, args) = parser.parse_args()
 
   if len(args) == 1:
     (config_file, ) = args
   else:
     parser.error("Wrong number of arguments.")
 
-  if not qa_config.options.yes_do_it:
+  if not opts.yes_do_it:
     print ("Executing this script irreversibly destroys any Ganeti\n"
            "configuration on all nodes involved. If you really want\n"
            "to start testing, supply the --yes-do-it option.")
@@ -658,7 +691,7 @@ def main():
 
   qa_config.Load(config_file)
 
-  primary = qa_config.GetMasterNode()["primary"]
+  primary = qa_config.GetMasterNode().primary
   qa_utils.StartMultiplexer(primary)
   print ("SSH command for primary node: %s" %
          utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
index 501da9d..10b93a8 100644 (file)
@@ -33,7 +33,7 @@
   "# Default network interface parameters": null,
   "#default-nicparams": {
     "mode": "bridged",
-    "link": "xen-br0",
+    "link": "xen-br0"
   },
 
   "os": "debian-etch",
     ]
   },
 
+  "networks": {
+    "inexistent-networks": [
+      "network1",
+      "network2",
+      "network3"
+    ]
+  },
+
   "tests": {
     "# Whether tests are enabled or disabled by default": null,
     "default": true,
     "group-list": true,
     "group-rwops": true,
 
+    "network": false,
+
     "node-list": true,
     "node-info": true,
     "node-volumes": true,
index 83a713e..8bde8ee 100644 (file)
@@ -60,7 +60,7 @@ def _CheckFileOnAllNodes(filename, content):
   """
   cmd = utils.ShellQuoteArgs(["cat", filename])
   for node in qa_config.get("nodes"):
-    AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
+    AssertEqual(qa_utils.GetCommandOutput(node.primary, cmd), content)
 
 
 # "gnt-cluster info" fields
@@ -81,7 +81,7 @@ def _GetBoolClusterField(field):
   """
   master = qa_config.GetMasterNode()
   infocmd = "gnt-cluster info"
-  info_out = qa_utils.GetCommandOutput(master["primary"], infocmd)
+  info_out = qa_utils.GetCommandOutput(master.primary, infocmd)
   ret = None
   for l in info_out.splitlines():
     m = _CIFIELD_RE.match(l)
@@ -123,7 +123,7 @@ def AssertClusterVerify(fail=False, errors=None):
   cvcmd = "gnt-cluster verify"
   mnode = qa_config.GetMasterNode()
   if errors:
-    cvout = GetCommandOutput(mnode["primary"], cvcmd + " --error-codes",
+    cvout = GetCommandOutput(mnode.primary, cvcmd + " --error-codes",
                              fail=True)
     actual = _GetCVErrorCodes(cvout)
     expected = compat.UniqueFrozenset(e for (_, e, _) in errors)
@@ -161,7 +161,7 @@ def TestClusterInit(rapi_user, rapi_secret):
     fh.write("%s %s write\n" % (rapi_user, rapi_secret))
     fh.flush()
 
-    tmpru = qa_utils.UploadFile(master["primary"], fh.name)
+    tmpru = qa_utils.UploadFile(master.primary, fh.name)
     try:
       AssertCommand(["mkdir", "-p", rapi_dir])
       AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
@@ -185,8 +185,8 @@ def TestClusterInit(rapi_user, rapi_secret):
       if spec:
         cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
 
-  if master.get("secondary", None):
-    cmd.append("--secondary-ip=%s" % master["secondary"])
+  if master.secondary:
+    cmd.append("--secondary-ip=%s" % master.secondary)
 
   vgname = qa_config.get("vg-name", None)
   if vgname:
@@ -299,7 +299,7 @@ def TestClusterEpo():
   master = qa_config.GetMasterNode()
 
   # Assert that OOB is unavailable for all nodes
-  result_output = GetCommandOutput(master["primary"],
+  result_output = GetCommandOutput(master.primary,
                                    "gnt-node list --verbose --no-headers -o"
                                    " powered")
   AssertEqual(compat.all(powered == "(unavail)"
@@ -311,13 +311,13 @@ def TestClusterEpo():
   AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
 
   # Unless --all is given master is not allowed to be in the list
-  AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
+  AssertCommand(["gnt-cluster", "epo", "-f", master.primary], fail=True)
 
   # This shouldn't fail
   AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
 
   # All instances should have been stopped now
-  result_output = GetCommandOutput(master["primary"],
+  result_output = GetCommandOutput(master.primary,
                                    "gnt-instance list --no-headers -o status")
   # ERROR_down because the instance is stopped but not recorded as such
   AssertEqual(compat.all(status == "ERROR_down"
@@ -327,7 +327,7 @@ def TestClusterEpo():
   AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
 
   # All instances should have been started now
-  result_output = GetCommandOutput(master["primary"],
+  result_output = GetCommandOutput(master.primary,
                                    "gnt-instance list --no-headers -o status")
   AssertEqual(compat.all(status == "running"
                          for status in result_output.splitlines()), True)
@@ -349,7 +349,7 @@ def TestDelay(node):
   AssertCommand(["gnt-debug", "delay", "1"])
   AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
   AssertCommand(["gnt-debug", "delay", "--no-master",
-                 "-n", node["primary"], "1"])
+                 "-n", node.primary, "1"])
 
 
 def TestClusterReservedLvs():
@@ -462,7 +462,7 @@ def TestClusterRenewCrypto():
          "--rapi-certificate=/dev/null"]
   AssertCommand(cmd, fail=True)
 
-  rapi_cert_backup = qa_utils.BackupFile(master["primary"],
+  rapi_cert_backup = qa_utils.BackupFile(master.primary,
                                          pathutils.RAPI_CERT_FILE)
   try:
     # Custom RAPI certificate
@@ -473,7 +473,7 @@ def TestClusterRenewCrypto():
 
     utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
 
-    tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
+    tmpcert = qa_utils.UploadFile(master.primary, fh.name)
     try:
       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
                      "--rapi-certificate=%s" % tmpcert])
@@ -486,7 +486,7 @@ def TestClusterRenewCrypto():
     cds_fh.write("\n")
     cds_fh.flush()
 
-    tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
+    tmpcds = qa_utils.UploadFile(master.primary, cds_fh.name)
     try:
       AssertCommand(["gnt-cluster", "renew-crypto", "--force",
                      "--cluster-domain-secret=%s" % tmpcds])
@@ -510,7 +510,7 @@ def TestClusterBurnin():
   master = qa_config.GetMasterNode()
 
   options = qa_config.get("options", {})
-  disk_template = options.get("burnin-disk-template", "drbd")
+  disk_template = options.get("burnin-disk-template", constants.DT_DRBD8)
   parallel = options.get("burnin-in-parallel", False)
   check_inst = options.get("burnin-check-instances", False)
   do_rename = options.get("burnin-rename", "")
@@ -530,7 +530,7 @@ def TestClusterBurnin():
     if len(instances) < 1:
       raise qa_error.Error("Burnin needs at least one instance")
 
-    script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
+    script = qa_utils.UploadFile(master.primary, "../tools/burnin")
     try:
       # Run burnin
       cmd = [script,
@@ -551,14 +551,14 @@ def TestClusterBurnin():
         cmd.append("--no-reboot")
       else:
         cmd.append("--reboot-types=%s" % ",".join(reboot_types))
-      cmd += [inst["name"] for inst in instances]
+      cmd += [inst.name for inst in instances]
       AssertCommand(cmd)
     finally:
       AssertCommand(["rm", "-f", script])
 
   finally:
     for inst in instances:
-      qa_config.ReleaseInstance(inst)
+      inst.Release()
 
 
 def TestClusterMasterFailover():
@@ -572,7 +572,7 @@ def TestClusterMasterFailover():
     # Back to original master node
     AssertCommand(cmd, node=master)
   finally:
-    qa_config.ReleaseNode(failovermaster)
+    failovermaster.Release()
 
 
 def TestClusterMasterFailoverWithDrainedQueue():
@@ -599,7 +599,7 @@ def TestClusterMasterFailoverWithDrainedQueue():
     # Back to original master node
     AssertCommand(cmd, node=master)
   finally:
-    qa_config.ReleaseNode(failovermaster)
+    failovermaster.Release()
 
   AssertCommand(drain_check, fail=True)
   AssertCommand(drain_check, node=failovermaster, fail=True)
@@ -618,7 +618,7 @@ def TestClusterCopyfile():
   f.seek(0)
 
   # Upload file to master node
-  testname = qa_utils.UploadFile(master["primary"], f.name)
+  testname = qa_utils.UploadFile(master.primary, f.name)
   try:
     # Copy file to all nodes
     AssertCommand(["gnt-cluster", "copyfile", testname])
@@ -679,7 +679,7 @@ def TestExclStorSharedPv(node):
   vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
   lvname1 = _QA_LV_PREFIX + "vol1"
   lvname2 = _QA_LV_PREFIX + "vol2"
-  node_name = node["primary"]
+  node_name = node.primary
   AssertCommand(["lvcreate", "-L1G", "-n", lvname1, vgname], node=node_name)
   AssertClusterVerify(fail=True, errors=[constants.CV_ENODEORPHANLV])
   AssertCommand(["lvcreate", "-L1G", "-n", lvname2, vgname], node=node_name)
index 6c4e3f5..6b0c5e1 100644 (file)
@@ -35,54 +35,334 @@ import qa_error
 
 _INSTANCE_CHECK_KEY = "instance-check"
 _ENABLED_HV_KEY = "enabled-hypervisors"
-# Key to store the cluster-wide run-time value of the exclusive storage flag
-_EXCLUSIVE_STORAGE_KEY = "_exclusive_storage"
 
+#: QA configuration (L{_QaConfig})
+_config = None
 
-cfg = {}
-options = None
 
+class _QaInstance(object):
+  __slots__ = [
+    "name",
+    "nicmac",
+    "used",
+    "_disk_template",
+    ]
 
-def Load(path):
-  """Loads the passed configuration file.
+  def __init__(self, name, nicmac):
+    """Initializes instances of this class.
+
+    """
+    self.name = name
+    self.nicmac = nicmac
+    self.used = None
+    self._disk_template = None
+
+  @classmethod
+  def FromDict(cls, data):
+    """Creates instance object from JSON dictionary.
+
+    """
+    nicmac = []
+
+    macaddr = data.get("nic.mac/0")
+    if macaddr:
+      nicmac.append(macaddr)
+
+    return cls(name=data["name"], nicmac=nicmac)
+
+  def Release(self):
+    """Releases instance and makes it available again.
+
+    """
+    assert self.used, \
+      ("Instance '%s' was never acquired or released more than once" %
+       self.name)
+
+    self.used = False
+    self._disk_template = None
+
+  def GetNicMacAddr(self, idx, default):
+    """Returns MAC address for NIC.
+
+    @type idx: int
+    @param idx: NIC index
+    @param default: Default value
+
+    """
+    if len(self.nicmac) > idx:
+      return self.nicmac[idx]
+    else:
+      return default
+
+  def SetDiskTemplate(self, template):
+    """Set the disk template.
+
+    """
+    assert template in constants.DISK_TEMPLATES
+
+    self._disk_template = template
+
+  @property
+  def disk_template(self):
+    """Returns the current disk template.
+
+    """
+    return self._disk_template
+
+
+class _QaNode(object):
+  __slots__ = [
+    "primary",
+    "secondary",
+    "_added",
+    "_use_count",
+    ]
+
+  def __init__(self, primary, secondary):
+    """Initializes instances of this class.
+
+    """
+    self.primary = primary
+    self.secondary = secondary
+    self._added = False
+    self._use_count = 0
+
+  @classmethod
+  def FromDict(cls, data):
+    """Creates node object from JSON dictionary.
+
+    """
+    return cls(primary=data["primary"], secondary=data.get("secondary"))
+
+  def Use(self):
+    """Marks a node as being in use.
+
+    """
+    assert self._use_count >= 0
+
+    self._use_count += 1
+
+    return self
+
+  def Release(self):
+    """Release a node (opposite of L{Use}).
+
+    """
+    assert self.use_count > 0
+
+    self._use_count -= 1
+
+  def MarkAdded(self):
+    """Marks node as having been added to a cluster.
+
+    """
+    assert not self._added
+    self._added = True
+
+  def MarkRemoved(self):
+    """Marks node as having been removed from a cluster.
+
+    """
+    assert self._added
+    self._added = False
+
+  @property
+  def added(self):
+    """Returns whether a node is part of a cluster.
+
+    """
+    return self._added
+
+  @property
+  def use_count(self):
+    """Returns number of current uses (controlled by L{Use} and L{Release}).
+
+    """
+    return self._use_count
+
+
+_RESOURCE_CONVERTER = {
+  "instances": _QaInstance.FromDict,
+  "nodes": _QaNode.FromDict,
+  }
+
+
+def _ConvertResources((key, value)):
+  """Converts cluster resources in configuration to Python objects.
 
   """
-  global cfg # pylint: disable=W0603
+  fn = _RESOURCE_CONVERTER.get(key, None)
+  if fn:
+    return (key, map(fn, value))
+  else:
+    return (key, value)
+
+
+class _QaConfig(object):
+  def __init__(self, data):
+    """Initializes instances of this class.
+
+    """
+    self._data = data
+
+    #: Cluster-wide run-time value of the exclusive storage flag
+    self._exclusive_storage = None
+
+  @classmethod
+  def Load(cls, filename):
+    """Loads a configuration file and produces a configuration object.
+
+    @type filename: string
+    @param filename: Path to configuration file
+    @rtype: L{_QaConfig}
+
+    """
+    data = serializer.LoadJson(utils.ReadFile(filename))
+
+    result = cls(dict(map(_ConvertResources,
+                          data.items()))) # pylint: disable=E1103
+    result.Validate()
+
+    return result
+
+  def Validate(self):
+    """Validates loaded configuration data.
 
-  cfg = serializer.LoadJson(utils.ReadFile(path))
+    """
+    if not self.get("nodes"):
+      raise qa_error.Error("Need at least one node")
+
+    if not self.get("instances"):
+      raise qa_error.Error("Need at least one instance")
+
+    if (self.get("disk") is None or
+        self.get("disk-growth") is None or
+        len(self.get("disk")) != len(self.get("disk-growth"))):
+      raise qa_error.Error("Config options 'disk' and 'disk-growth' must exist"
+                           " and have the same number of items")
+
+    check = self.GetInstanceCheckScript()
+    if check:
+      try:
+        os.stat(check)
+      except EnvironmentError, err:
+        raise qa_error.Error("Can't find instance check script '%s': %s" %
+                             (check, err))
+
+    enabled_hv = frozenset(self.GetEnabledHypervisors())
+    if not enabled_hv:
+      raise qa_error.Error("No hypervisor is enabled")
+
+    difference = enabled_hv - constants.HYPER_TYPES
+    if difference:
+      raise qa_error.Error("Unknown hypervisor(s) enabled: %s" %
+                           utils.CommaJoin(difference))
+
+  def __getitem__(self, name):
+    """Returns configuration value.
+
+    @type name: string
+    @param name: Name of configuration entry
+
+    """
+    return self._data[name]
+
+  def get(self, name, default=None):
+    """Returns configuration value.
+
+    @type name: string
+    @param name: Name of configuration entry
+    @param default: Default value
+
+    """
+    return self._data.get(name, default)
+
+  def GetMasterNode(self):
+    """Returns the default master node for the cluster.
+
+    """
+    return self["nodes"][0]
+
+  def GetInstanceCheckScript(self):
+    """Returns path to instance check script or C{None}.
 
-  Validate()
+    """
+    return self._data.get(_INSTANCE_CHECK_KEY, None)
 
+  def GetEnabledHypervisors(self):
+    """Returns list of enabled hypervisors.
 
-def Validate():
-  if len(cfg["nodes"]) < 1:
-    raise qa_error.Error("Need at least one node")
-  if len(cfg["instances"]) < 1:
-    raise qa_error.Error("Need at least one instance")
-  if len(cfg["disk"]) != len(cfg["disk-growth"]):
-    raise qa_error.Error("Config options 'disk' and 'disk-growth' must have"
-                         " the same number of items")
+    @rtype: list
 
-  check = GetInstanceCheckScript()
-  if check:
+    """
     try:
-      os.stat(check)
-    except EnvironmentError, err:
-      raise qa_error.Error("Can't find instance check script '%s': %s" %
-                           (check, err))
+      value = self._data[_ENABLED_HV_KEY]
+    except KeyError:
+      return [constants.DEFAULT_ENABLED_HYPERVISOR]
+    else:
+      if value is None:
+        return []
+      elif isinstance(value, basestring):
+        # The configuration key ("enabled-hypervisors") implies there can be
+        # multiple values. Multiple hypervisors are comma-separated on the
+        # command line option to "gnt-cluster init", so we need to handle them
+        # equally here.
+        return value.split(",")
+      else:
+        return value
+
+  def GetDefaultHypervisor(self):
+    """Returns the default hypervisor to be used.
+
+    """
+    return self.GetEnabledHypervisors()[0]
+
+  def SetExclusiveStorage(self, value):
+    """Set the expected value of the C{exclusive_storage} flag for the cluster.
+
+    """
+    self._exclusive_storage = bool(value)
+
+  def GetExclusiveStorage(self):
+    """Get the expected value of the C{exclusive_storage} flag for the cluster.
+
+    """
+    value = self._exclusive_storage
+    assert value is not None
+    return value
+
+  def IsTemplateSupported(self, templ):
+    """Is the given disk template supported by the current configuration?
+
+    """
+    return (not self.GetExclusiveStorage() or
+            templ in constants.DTS_EXCL_STORAGE)
+
+
+def Load(path):
+  """Loads the passed configuration file.
+
+  """
+  global _config # pylint: disable=W0603
 
-  enabled_hv = frozenset(GetEnabledHypervisors())
-  if not enabled_hv:
-    raise qa_error.Error("No hypervisor is enabled")
+  _config = _QaConfig.Load(path)
 
-  difference = enabled_hv - constants.HYPER_TYPES
-  if difference:
-    raise qa_error.Error("Unknown hypervisor(s) enabled: %s" %
-                         utils.CommaJoin(difference))
+
+def GetConfig():
+  """Returns the configuration object.
+
+  """
+  if _config is None:
+    raise RuntimeError("Configuration not yet loaded")
+
+  return _config
 
 
 def get(name, default=None):
-  return cfg.get(name, default)
+  """Wrapper for L{_QaConfig.get}.
+
+  """
+  return GetConfig().get(name, default=default)
 
 
 class Either:
@@ -148,10 +428,12 @@ def TestEnabled(tests, _cfg=None):
 
   """
   if _cfg is None:
-    _cfg = cfg
+    cfg = GetConfig()
+  else:
+    cfg = _cfg
 
   # Get settings for all tests
-  cfg_tests = _cfg.get("tests", {})
+  cfg_tests = cfg.get("tests", {})
 
   # Get default setting
   default = cfg_tests.get("default", True)
@@ -160,121 +442,90 @@ def TestEnabled(tests, _cfg=None):
                            tests, compat.all)
 
 
-def GetInstanceCheckScript():
-  """Returns path to instance check script or C{None}.
+def GetInstanceCheckScript(*args):
+  """Wrapper for L{_QaConfig.GetInstanceCheckScript}.
 
   """
-  return cfg.get(_INSTANCE_CHECK_KEY, None)
-
+  return GetConfig().GetInstanceCheckScript(*args)
 
-def GetEnabledHypervisors():
-  """Returns list of enabled hypervisors.
 
-  @rtype: list
+def GetEnabledHypervisors(*args):
+  """Wrapper for L{_QaConfig.GetEnabledHypervisors}.
 
   """
-  try:
-    value = cfg[_ENABLED_HV_KEY]
-  except KeyError:
-    return [constants.DEFAULT_ENABLED_HYPERVISOR]
-  else:
-    if isinstance(value, basestring):
-      # The configuration key ("enabled-hypervisors") implies there can be
-      # multiple values. Multiple hypervisors are comma-separated on the
-      # command line option to "gnt-cluster init", so we need to handle them
-      # equally here.
-      return value.split(",")
-    else:
-      return value
+  return GetConfig().GetEnabledHypervisors(*args)
 
 
-def GetDefaultHypervisor():
-  """Returns the default hypervisor to be used.
+def GetDefaultHypervisor(*args):
+  """Wrapper for L{_QaConfig.GetDefaultHypervisor}.
 
   """
-  return GetEnabledHypervisors()[0]
+  return GetConfig().GetDefaultHypervisor(*args)
 
 
-def GetInstanceNicMac(inst, default=None):
-  """Returns MAC address for instance's network interface.
+def GetMasterNode():
+  """Wrapper for L{_QaConfig.GetMasterNode}.
 
   """
-  return inst.get("nic.mac/0", default)
-
-
-def GetMasterNode():
-  return cfg["nodes"][0]
+  return GetConfig().GetMasterNode()
 
 
-def AcquireInstance():
+def AcquireInstance(_cfg=None):
   """Returns an instance which isn't in use.
 
   """
+  if _cfg is None:
+    cfg = GetConfig()
+  else:
+    cfg = _cfg
+
   # Filter out unwanted instances
-  tmp_flt = lambda inst: not inst.get("_used", False)
-  instances = filter(tmp_flt, cfg["instances"])
-  del tmp_flt
+  instances = filter(lambda inst: not inst.used, cfg["instances"])
 
-  if len(instances) == 0:
+  if not instances:
     raise qa_error.OutOfInstancesError("No instances left")
 
   inst = instances[0]
-  inst["_used"] = True
-  inst["_template"] = None
-  return inst
-
-
-def ReleaseInstance(inst):
-  inst["_used"] = False
-
-
-def GetInstanceTemplate(inst):
-  """Return the disk template of an instance.
-
-  """
-  templ = inst["_template"]
-  assert templ is not None
-  return templ
 
+  assert not inst.used
+  assert inst.disk_template is None
 
-def SetInstanceTemplate(inst, template):
-  """Set the disk template for an instance.
+  inst.used = True
 
-  """
-  inst["_template"] = template
+  return inst
 
 
 def SetExclusiveStorage(value):
-  """Set the expected value of the exclusive_storage flag for the cluster.
+  """Wrapper for L{_QaConfig.SetExclusiveStorage}.
 
   """
-  cfg[_EXCLUSIVE_STORAGE_KEY] = bool(value)
+  return GetConfig().SetExclusiveStorage(value)
 
 
 def GetExclusiveStorage():
-  """Get the expected value of the exclusive_storage flag for the cluster.
+  """Wrapper for L{_QaConfig.GetExclusiveStorage}.
 
   """
-  val = cfg.get(_EXCLUSIVE_STORAGE_KEY)
-  assert val is not None
-  return val
+  return GetConfig().GetExclusiveStorage()
 
 
 def IsTemplateSupported(templ):
-  """Is the given templated supported by the current configuration?
+  """Wrapper for L{_QaConfig.GetExclusiveStorage}.
 
   """
-  if GetExclusiveStorage():
-    return templ in constants.DTS_EXCL_STORAGE
-  else:
-    return True
+  return GetConfig().IsTemplateSupported(templ)
 
 
-def AcquireNode(exclude=None):
+def AcquireNode(exclude=None, _cfg=None):
   """Returns the least used node.
 
   """
-  master = GetMasterNode()
+  if _cfg is None:
+    cfg = GetConfig()
+  else:
+    cfg = _cfg
+
+  master = cfg.GetMasterNode()
 
   # Filter out unwanted nodes
   # TODO: Maybe combine filters
@@ -285,25 +536,22 @@ def AcquireNode(exclude=None):
   else:
     nodes = filter(lambda node: node != exclude, cfg["nodes"])
 
-  tmp_flt = lambda node: node.get("_added", False) or node == master
-  nodes = filter(tmp_flt, nodes)
-  del tmp_flt
+  nodes = filter(lambda node: node.added or node == master, nodes)
 
-  if len(nodes) == 0:
+  if not nodes:
     raise qa_error.OutOfNodesError("No nodes left")
 
   # Get node with least number of uses
+  # TODO: Switch to computing sort key instead of comparing directly
   def compare(a, b):
-    result = cmp(a.get("_count", 0), b.get("_count", 0))
+    result = cmp(a.use_count, b.use_count)
     if result == 0:
-      result = cmp(a["primary"], b["primary"])
+      result = cmp(a.primary, b.primary)
     return result
 
   nodes.sort(cmp=compare)
 
-  node = nodes[0]
-  node["_count"] = node.get("_count", 0) + 1
-  return node
+  return nodes[0].Use()
 
 
 def AcquireManyNodes(num, exclude=None):
@@ -337,10 +585,6 @@ def AcquireManyNodes(num, exclude=None):
   return nodes
 
 
-def ReleaseNode(node):
-  node["_count"] = node.get("_count", 0) - 1
-
-
 def ReleaseManyNodes(nodes):
-  for n in nodes:
-    ReleaseNode(n)
+  for node in nodes:
+    node.Release()
index 6d2456c..68eeea3 100644 (file)
@@ -45,7 +45,7 @@ def _InstanceRunning(name):
 
   cmd = (utils.ShellQuoteArgs(["gnt-instance", "list", "-o", "status", name]) +
          ' | grep running')
-  ret = StartSSH(master["primary"], cmd).wait()
+  ret = StartSSH(master.primary, cmd).wait()
   return ret == 0
 
 
@@ -87,7 +87,7 @@ def TestPauseWatcher():
   AssertCommand(["gnt-cluster", "watcher", "pause", "4h"])
 
   cmd = ["gnt-cluster", "watcher", "info"]
-  output = GetCommandOutput(master["primary"],
+  output = GetCommandOutput(master.primary,
                             utils.ShellQuoteArgs(cmd))
   AssertMatch(output, r"^.*\bis paused\b.*")
 
@@ -101,7 +101,7 @@ def TestResumeWatcher():
   AssertCommand(["gnt-cluster", "watcher", "continue"])
 
   cmd = ["gnt-cluster", "watcher", "info"]
-  output = GetCommandOutput(master["primary"],
+  output = GetCommandOutput(master.primary,
                             utils.ShellQuoteArgs(cmd))
   AssertMatch(output, r"^.*\bis not paused\b.*")
 
@@ -110,7 +110,7 @@ def TestInstanceAutomaticRestart(instance):
   """Test automatic restart of instance by ganeti-watcher.
 
   """
-  inst_name = qa_utils.ResolveInstanceName(instance["name"])
+  inst_name = qa_utils.ResolveInstanceName(instance.name)
 
   _ResetWatcherDaemon()
   _ShutdownInstance(inst_name)
@@ -128,7 +128,7 @@ def TestInstanceConsecutiveFailures(instance):
   """Test five consecutive instance failures.
 
   """
-  inst_name = qa_utils.ResolveInstanceName(instance["name"])
+  inst_name = qa_utils.ResolveInstanceName(instance.name)
 
   _ResetWatcherDaemon()
 
index 1831366..bb8ab33 100644 (file)
@@ -75,9 +75,9 @@ def TestIcmpPing():
   pricmd = [pingprimary, "-e"]
   seccmd = [pingsecondary, "-e"]
   for i in nodes:
-    pricmd.append(i["primary"])
-    if "secondary" in i:
-      seccmd.append(i["secondary"])
+    pricmd.append(i.primary)
+    if i.secondary:
+      seccmd.append(i.secondary)
 
   pristr = utils.ShellQuoteArgs(pricmd)
   if seccmd:
index 2e872ba..22f9f72 100644 (file)
@@ -128,7 +128,7 @@ def TestAssignNodesIncludingSplit(orig_group, node1, node2):
 
   (other_group, ) = qa_utils.GetNonexistentGroups(1)
 
-  master_node = qa_config.GetMasterNode()["primary"]
+  master_node = qa_config.GetMasterNode().primary
 
   def AssertInGroup(group, nodes):
     real_output = GetCommandOutput(master_node,
index bcc464a..8afe3ec 100644 (file)
@@ -56,7 +56,8 @@ def _GetGenericAddParameters(inst, force_mac=None):
   if force_mac:
     nic0_mac = force_mac
   else:
-    nic0_mac = qa_config.GetInstanceNicMac(inst)
+    nic0_mac = inst.GetNicMacAddr(0, None)
+
   if nic0_mac:
     params.extend(["--net", "0:mac=%s" % nic0_mac])
 
@@ -71,16 +72,16 @@ def _DiskTest(node, disk_template):
             "--disk-template=%s" % disk_template,
             "--node=%s" % node] +
            _GetGenericAddParameters(instance))
-    cmd.append(instance["name"])
+    cmd.append(instance.name)
 
     AssertCommand(cmd)
 
-    _CheckSsconfInstanceList(instance["name"])
-    qa_config.SetInstanceTemplate(instance, disk_template)
+    _CheckSsconfInstanceList(instance.name)
+    instance.SetDiskTemplate(disk_template)
 
     return instance
   except:
-    qa_config.ReleaseInstance(instance)
+    instance.Release()
     raise
 
 
@@ -99,7 +100,7 @@ def _GetInstanceInfo(instance):
   """
   master = qa_config.GetMasterNode()
   infocmd = utils.ShellQuoteArgs(["gnt-instance", "info", instance])
-  info_out = qa_utils.GetCommandOutput(master["primary"], infocmd)
+  info_out = qa_utils.GetCommandOutput(master.primary, infocmd)
   re_node = re.compile(r"^\s+-\s+(?:primary|secondaries):\s+(\S.+)$")
   node_elem = r"([^,()]+)(?:\s+\([^)]+\))?"
   # re_nodelist matches a list of nodes returned by gnt-instance info, e.g.:
@@ -147,7 +148,7 @@ def _DestroyInstanceVolumes(instance):
   @param instance: the instance
 
   """
-  info = _GetInstanceInfo(instance["name"])
+  info = _GetInstanceInfo(instance.name)
   vols = info["volumes"]
   for node in info["nodes"]:
     AssertCommand(["lvremove", "-f"] + vols, node=node)
@@ -165,7 +166,7 @@ def _GetBoolInstanceField(instance, field):
   master = qa_config.GetMasterNode()
   infocmd = utils.ShellQuoteArgs(["gnt-instance", "list", "--no-headers",
                                   "-o", field, instance])
-  info_out = qa_utils.GetCommandOutput(master["primary"], infocmd).strip()
+  info_out = qa_utils.GetCommandOutput(master.primary, infocmd).strip()
   if info_out == "Y":
     return True
   elif info_out == "N":
@@ -176,53 +177,48 @@ def _GetBoolInstanceField(instance, field):
 
 
 def IsFailoverSupported(instance):
-  templ = qa_config.GetInstanceTemplate(instance)
-  return templ in constants.DTS_MIRRORED
+  return instance.disk_template in constants.DTS_MIRRORED
 
 
 def IsMigrationSupported(instance):
-  templ = qa_config.GetInstanceTemplate(instance)
-  return templ in constants.DTS_MIRRORED
+  return instance.disk_template in constants.DTS_MIRRORED
 
 
 def IsDiskReplacingSupported(instance):
-  templ = qa_config.GetInstanceTemplate(instance)
-  return templ == constants.DT_DRBD8
+  return instance.disk_template == constants.DT_DRBD8
 
 
 @InstanceCheck(None, INST_UP, RETURN_VALUE)
 def TestInstanceAddWithPlainDisk(nodes):
   """gnt-instance add -t plain"""
   assert len(nodes) == 1
-  return _DiskTest(nodes[0]["primary"], "plain")
+  return _DiskTest(nodes[0].primary, constants.DT_PLAIN)
 
 
 @InstanceCheck(None, INST_UP, RETURN_VALUE)
 def TestInstanceAddWithDrbdDisk(nodes):
   """gnt-instance add -t drbd"""
   assert len(nodes) == 2
-  return _DiskTest(":".join(map(operator.itemgetter("primary"), nodes)),
-                   "drbd")
+  return _DiskTest(":".join(map(operator.attrgetter("primary"), nodes)),
+                   constants.DT_DRBD8)
 
 
 @InstanceCheck(None, INST_DOWN, FIRST_ARG)
 def TestInstanceRemove(instance):
   """gnt-instance remove"""
-  AssertCommand(["gnt-instance", "remove", "-f", instance["name"]])
-
-  qa_config.ReleaseInstance(instance)
+  AssertCommand(["gnt-instance", "remove", "-f", instance.name])
 
 
 @InstanceCheck(INST_DOWN, INST_UP, FIRST_ARG)
 def TestInstanceStartup(instance):
   """gnt-instance startup"""
-  AssertCommand(["gnt-instance", "startup", instance["name"]])
+  AssertCommand(["gnt-instance", "startup", instance.name])
 
 
 @InstanceCheck(INST_UP, INST_DOWN, FIRST_ARG)
 def TestInstanceShutdown(instance):
   """gnt-instance shutdown"""
-  AssertCommand(["gnt-instance", "shutdown", instance["name"]])
+  AssertCommand(["gnt-instance", "shutdown", instance.name])
 
 
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
@@ -230,7 +226,7 @@ def TestInstanceReboot(instance):
   """gnt-instance reboot"""
   options = qa_config.get("options", {})
   reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
-  name = instance["name"]
+  name = instance.name
   for rtype in reboot_types:
     AssertCommand(["gnt-instance", "reboot", "--type=%s" % rtype, name])
 
@@ -240,7 +236,7 @@ def TestInstanceReboot(instance):
 
   master = qa_config.GetMasterNode()
   cmd = ["gnt-instance", "list", "--no-headers", "-o", "status", name]
-  result_output = qa_utils.GetCommandOutput(master["primary"],
+  result_output = qa_utils.GetCommandOutput(master.primary,
                                             utils.ShellQuoteArgs(cmd))
   AssertEqual(result_output.strip(), constants.INSTST_RUNNING)
 
@@ -248,12 +244,12 @@ def TestInstanceReboot(instance):
 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
 def TestInstanceReinstall(instance):
   """gnt-instance reinstall"""
-  AssertCommand(["gnt-instance", "reinstall", "-f", instance["name"]])
+  AssertCommand(["gnt-instance", "reinstall", "-f", instance.name])
 
   # Test with non-existant OS definition
   AssertCommand(["gnt-instance", "reinstall", "-f",
                  "--os-type=NonExistantOsForQa",
-                 instance["name"]],
+                 instance.name],
                 fail=True)
 
 
@@ -266,7 +262,7 @@ def _ReadSsconfInstanceList():
   cmd = ["cat", utils.PathJoin(pathutils.DATA_DIR,
                                "ssconf_%s" % constants.SS_INSTANCE_LIST)]
 
-  return qa_utils.GetCommandOutput(master["primary"],
+  return qa_utils.GetCommandOutput(master.primary,
                                    utils.ShellQuoteArgs(cmd)).splitlines()
 
 
@@ -338,7 +334,7 @@ def TestInstanceFailover(instance):
                               " test")
     return
 
-  cmd = ["gnt-instance", "failover", "--force", instance["name"]]
+  cmd = ["gnt-instance", "failover", "--force", instance.name]
 
   # failover ...
   AssertCommand(cmd)
@@ -356,10 +352,10 @@ def TestInstanceMigrate(instance, toggle_always_failover=True):
                               " test")
     return
 
-  cmd = ["gnt-instance", "migrate", "--force", instance["name"]]
+  cmd = ["gnt-instance", "migrate", "--force", instance.name]
   af_par = constants.BE_ALWAYS_FAILOVER
   af_field = "be/" + constants.BE_ALWAYS_FAILOVER
-  af_init_val = _GetBoolInstanceField(instance["name"], af_field)
+  af_init_val = _GetBoolInstanceField(instance.name, af_field)
 
   # migrate ...
   AssertCommand(cmd)
@@ -370,21 +366,21 @@ def TestInstanceMigrate(instance, toggle_always_failover=True):
   if toggle_always_failover:
     AssertCommand(["gnt-instance", "modify", "-B",
                    ("%s=%s" % (af_par, not af_init_val)),
-                   instance["name"]])
+                   instance.name])
   AssertCommand(cmd)
   # TODO: Verify the choice between failover and migration
   qa_utils.RunInstanceCheck(instance, True)
   if toggle_always_failover:
     AssertCommand(["gnt-instance", "modify", "-B",
-                   ("%s=%s" % (af_par, af_init_val)), instance["name"]])
+                   ("%s=%s" % (af_par, af_init_val)), instance.name])
 
   # TODO: Split into multiple tests
-  AssertCommand(["gnt-instance", "shutdown", instance["name"]])
+  AssertCommand(["gnt-instance", "shutdown", instance.name])
   qa_utils.RunInstanceCheck(instance, False)
   AssertCommand(cmd, fail=True)
   AssertCommand(["gnt-instance", "migrate", "--force", "--allow-failover",
-                 instance["name"]])
-  AssertCommand(["gnt-instance", "start", instance["name"]])
+                 instance.name])
+  AssertCommand(["gnt-instance", "start", instance.name])
   AssertCommand(cmd)
   # @InstanceCheck enforces the check that the instance is running
   qa_utils.RunInstanceCheck(instance, True)
@@ -392,7 +388,7 @@ def TestInstanceMigrate(instance, toggle_always_failover=True):
   AssertCommand(["gnt-instance", "modify", "-B",
                  ("%s=%s" %
                   (constants.BE_ALWAYS_FAILOVER, constants.VALUE_TRUE)),
-                 instance["name"]])
+                 instance.name])
 
   AssertCommand(cmd)
   qa_utils.RunInstanceCheck(instance, True)
@@ -402,7 +398,7 @@ def TestInstanceMigrate(instance, toggle_always_failover=True):
   AssertCommand(["gnt-instance", "modify", "-B",
                  ("%s=%s" %
                   (constants.BE_ALWAYS_FAILOVER, constants.VALUE_FALSE)),
-                 instance["name"]])
+                 instance.name])
 
   AssertCommand(cmd)
   qa_utils.RunInstanceCheck(instance, True)
@@ -410,7 +406,7 @@ def TestInstanceMigrate(instance, toggle_always_failover=True):
 
 def TestInstanceInfo(instance):
   """gnt-instance info"""
-  AssertCommand(["gnt-instance", "info", instance["name"]])
+  AssertCommand(["gnt-instance", "info", instance.name])
 
 
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
@@ -458,23 +454,23 @@ def TestInstanceModify(instance):
       ])
 
   for alist in args:
-    AssertCommand(["gnt-instance", "modify"] + alist + [instance["name"]])
+    AssertCommand(["gnt-instance", "modify"] + alist + [instance.name])
 
   # check no-modify
-  AssertCommand(["gnt-instance", "modify", instance["name"]], fail=True)
+  AssertCommand(["gnt-instance", "modify", instance.name], fail=True)
 
   # Marking offline while instance is running must fail...
-  AssertCommand(["gnt-instance", "modify", "--offline", instance["name"]],
+  AssertCommand(["gnt-instance", "modify", "--offline", instance.name],
                  fail=True)
 
   # ...while making it online is ok, and should work
-  AssertCommand(["gnt-instance", "modify", "--online", instance["name"]])
+  AssertCommand(["gnt-instance", "modify", "--online", instance.name])
 
 
 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
 def TestInstanceStoppedModify(instance):
   """gnt-instance modify (stopped instance)"""
-  name = instance["name"]
+  name = instance.name
 
   # Instance was not marked offline; try marking it online once more
   AssertCommand(["gnt-instance", "modify", "--online", name])
@@ -499,16 +495,18 @@ def TestInstanceStoppedModify(instance):
 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
 def TestInstanceConvertDiskToPlain(instance, inodes):
   """gnt-instance modify -t"""
-  name = instance["name"]
-  template = qa_config.GetInstanceTemplate(instance)
-  if template != "drbd":
+  name = instance.name
+
+  template = instance.disk_template
+  if template != constants.DT_DRBD8:
     print qa_utils.FormatInfo("Unsupported template %s, skipping conversion"
                               " test" % template)
     return
+
   assert len(inodes) == 2
-  AssertCommand(["gnt-instance", "modify", "-t", "plain", name])
-  AssertCommand(["gnt-instance", "modify", "-t", "drbd",
-                 "-n", inodes[1]["primary"], name])
+  AssertCommand(["gnt-instance", "modify", "-t", constants.DT_PLAIN, name])
+  AssertCommand(["gnt-instance", "modify", "-t", constants.DT_DRBD8,
+                 "-n", inodes[1].primary, name])
 
 
 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
@@ -517,7 +515,7 @@ def TestInstanceGrowDisk(instance):
   if qa_config.GetExclusiveStorage():
     print qa_utils.FormatInfo("Test not supported with exclusive_storage")
     return
-  name = instance["name"]
+  name = instance.name
   all_size = qa_config.get("disk")
   all_grow = qa_config.get("disk-growth")
   if not all_grow:
@@ -550,7 +548,7 @@ def TestInstanceListFields():
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
 def TestInstanceConsole(instance):
   """gnt-instance console"""
-  AssertCommand(["gnt-instance", "console", "--show-cmd", instance["name"]])
+  AssertCommand(["gnt-instance", "console", "--show-cmd", instance.name])
 
 
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
@@ -559,7 +557,7 @@ def TestReplaceDisks(instance, curr_nodes, other_nodes):
   def buildcmd(args):
     cmd = ["gnt-instance", "replace-disks"]
     cmd.extend(args)
-    cmd.append(instance["name"])
+    cmd.append(instance.name)
     return cmd
 
   if not IsDiskReplacingSupported(instance):
@@ -581,23 +579,23 @@ def TestReplaceDisks(instance, curr_nodes, other_nodes):
     # A placeholder; the actual command choice depends on use_ialloc
     None,
     # Restore the original secondary
-    ["--new-secondary=%s" % snode["primary"]],
+    ["--new-secondary=%s" % snode.primary],
     ]:
     if data is None:
       if use_ialloc:
         data = ["-I", constants.DEFAULT_IALLOCATOR_SHORTCUT]
       else:
-        data = ["--new-secondary=%s" % othernode["primary"]]
+        data = ["--new-secondary=%s" % othernode.primary]
     AssertCommand(buildcmd(data))
 
   AssertCommand(buildcmd(["-a"]))
-  AssertCommand(["gnt-instance", "stop", instance["name"]])
+  AssertCommand(["gnt-instance", "stop", instance.name])
   AssertCommand(buildcmd(["-a"]), fail=True)
-  AssertCommand(["gnt-instance", "activate-disks", instance["name"]])
+  AssertCommand(["gnt-instance", "activate-disks", instance.name])
   AssertCommand(["gnt-instance", "activate-disks", "--wait-for-sync",
-                 instance["name"]])
+                 instance.name])
   AssertCommand(buildcmd(["-a"]))
-  AssertCommand(["gnt-instance", "start", instance["name"]])
+  AssertCommand(["gnt-instance", "start", instance.name])
 
 
 def _AssertRecreateDisks(cmdargs, instance, fail=False, check=True,
@@ -614,13 +612,13 @@ def _AssertRecreateDisks(cmdargs, instance, fail=False, check=True,
   if destroy:
     _DestroyInstanceVolumes(instance)
   AssertCommand((["gnt-instance", "recreate-disks"] + cmdargs +
-                 [instance["name"]]), fail)
+                 [instance.name]), fail)
   if not fail and check:
     # Quick check that the disks are there
-    AssertCommand(["gnt-instance", "activate-disks", instance["name"]])
+    AssertCommand(["gnt-instance", "activate-disks", instance.name])
     AssertCommand(["gnt-instance", "activate-disks", "--wait-for-sync",
-                   instance["name"]])
-    AssertCommand(["gnt-instance", "deactivate-disks", instance["name"]])
+                   instance.name])
+    AssertCommand(["gnt-instance", "deactivate-disks", instance.name])
 
 
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
@@ -634,15 +632,15 @@ def TestRecreateDisks(instance, inodes, othernodes):
   """
   options = qa_config.get("options", {})
   use_ialloc = options.get("use-iallocators", True)
-  other_seq = ":".join([n["primary"] for n in othernodes])
-  orig_seq = ":".join([n["primary"] for n in inodes])
+  other_seq = ":".join([n.primary for n in othernodes])
+  orig_seq = ":".join([n.primary for n in inodes])
   # These fail because the instance is running
   _AssertRecreateDisks(["-n", other_seq], instance, fail=True, destroy=False)
   if use_ialloc:
     _AssertRecreateDisks(["-I", "hail"], instance, fail=True, destroy=False)
   else:
     _AssertRecreateDisks(["-n", other_seq], instance, fail=True, destroy=False)
-  AssertCommand(["gnt-instance", "stop", instance["name"]])
+  AssertCommand(["gnt-instance", "stop", instance.name])
   # Disks exist: this should fail
   _AssertRecreateDisks([], instance, fail=True, destroy=False)
   # Recreate disks in place
@@ -658,30 +656,29 @@ def TestRecreateDisks(instance, inodes, othernodes):
   # Move disks back
   _AssertRecreateDisks(["-n", orig_seq], instance, check=False)
   # This and InstanceCheck decoration check that the disks are working
-  AssertCommand(["gnt-instance", "reinstall", "-f", instance["name"]])
-  AssertCommand(["gnt-instance", "start", instance["name"]])
+  AssertCommand(["gnt-instance", "reinstall", "-f", instance.name])
+  AssertCommand(["gnt-instance", "start", instance.name])
 
 
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
 def TestInstanceExport(instance, node):
   """gnt-backup export -n ..."""
-  name = instance["name"]
-  AssertCommand(["gnt-backup", "export", "-n", node["primary"], name])
+  name = instance.name
+  AssertCommand(["gnt-backup", "export", "-n", node.primary, name])
   return qa_utils.ResolveInstanceName(name)
 
 
 @InstanceCheck(None, INST_DOWN, FIRST_ARG)
 def TestInstanceExportWithRemove(instance, node):
   """gnt-backup export --remove-instance"""
-  AssertCommand(["gnt-backup", "export", "-n", node["primary"],
-                 "--remove-instance", instance["name"]])
-  qa_config.ReleaseInstance(instance)
+  AssertCommand(["gnt-backup", "export", "-n", node.primary,
+                 "--remove-instance", instance.name])
 
 
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
 def TestInstanceExportNoTarget(instance):
   """gnt-backup export (without target node, should fail)"""
-  AssertCommand(["gnt-backup", "export", instance["name"]], fail=True)
+  AssertCommand(["gnt-backup", "export", instance.name], fail=True)
 
 
 @InstanceCheck(None, INST_DOWN, FIRST_ARG)
@@ -691,18 +688,18 @@ def TestInstanceImport(newinst, node, expnode, name):
   cmd = (["gnt-backup", "import",
           "--disk-template=%s" % templ,
           "--no-ip-check",
-          "--src-node=%s" % expnode["primary"],
+          "--src-node=%s" % expnode.primary,
           "--src-dir=%s/%s" % (pathutils.EXPORT_DIR, name),
-          "--node=%s" % node["primary"]] +
+          "--node=%s" % node.primary] +
          _GetGenericAddParameters(newinst, force_mac=constants.VALUE_GENERATE))
-  cmd.append(newinst["name"])
+  cmd.append(newinst.name)
   AssertCommand(cmd)
-  qa_config.SetInstanceTemplate(newinst, templ)
+  newinst.SetDiskTemplate(templ)
 
 
 def TestBackupList(expnode):
   """gnt-backup list"""
-  AssertCommand(["gnt-backup", "list", "--node=%s" % expnode["primary"]])
+  AssertCommand(["gnt-backup", "list", "--node=%s" % expnode.primary])
 
   qa_utils.GenericQueryTest("gnt-backup", query.EXPORT_FIELDS.keys(),
                             namefield=None, test_unknown=False)
@@ -722,13 +719,13 @@ def TestRemoveInstanceOfflineNode(instance, snode, set_offline, set_online):
   @param set_online: function to call to set the node on-line
 
   """
-  info = _GetInstanceInfo(instance["name"])
+  info = _GetInstanceInfo(instance.name)
   set_offline(snode)
   try:
     TestInstanceRemove(instance)
   finally:
     set_online(snode)
   # Clean up the disks on the offline node
-  for minor in info["drbd-minors"][snode["primary"]]:
+  for minor in info["drbd-minors"][snode.primary]:
     AssertCommand(["drbdsetup", str(minor), "down"], node=snode)
   AssertCommand(["lvremove", "-f"] + info["volumes"], node=snode)
diff --git a/qa/qa_network.py b/qa/qa_network.py
new file mode 100644 (file)
index 0000000..5648fd6
--- /dev/null
@@ -0,0 +1,81 @@
+#
+#
+
+# Copyright (C) 2013 Google Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+
+
+"""QA tests for networks.
+
+"""
+
+import qa_config
+import qa_utils
+
+from qa_utils import AssertCommand
+
+
+def GetNonexistentNetworks(count):
+  """Gets network names which shouldn't exist on the cluster.
+
+  @param count: Number of networks to get
+  @rtype: integer
+
+  """
+  return qa_utils.GetNonexistentEntityNames(count, "networks", "network")
+
+
+def TestNetworkAddRemove():
+  """gnt-network add/remove"""
+  (network1, network2) = GetNonexistentNetworks(2)
+
+  # Add some networks of different sizes.
+  # Note: Using RFC5737 addresses.
+  AssertCommand(["gnt-network", "add", "--network", "192.0.2.0/30", network1])
+  AssertCommand(["gnt-network", "add", "--network", "198.51.100.0/24",
+                 network2])
+  # Try to add a network with an existing name.
+  AssertCommand(["gnt-network", "add", "--network", "203.0.133.0/24", network2],
+                fail=True)
+
+  AssertCommand(["gnt-network", "remove", network1])
+  AssertCommand(["gnt-network", "remove", network2])
+
+
+def TestNetworkConnect():
+  """gnt-network connect/disconnect"""
+  (group1, ) = qa_utils.GetNonexistentGroups(1)
+  (network1, ) = GetNonexistentNetworks(1)
+
+  default_mode = "bridged"
+  default_link = "xen-br0"
+  nicparams = qa_config.get("default-nicparams")
+  if nicparams:
+    mode = nicparams.get("mode", default_mode)
+    link = nicparams.get("link", default_link)
+  else:
+    mode = default_mode
+    link = default_link
+
+  AssertCommand(["gnt-group", "add", group1])
+  AssertCommand(["gnt-network", "add", "--network", "192.0.2.0/24", network1])
+
+  AssertCommand(["gnt-network", "connect", network1, mode, link, group1])
+  AssertCommand(["gnt-network", "disconnect", network1, group1])
+
+  AssertCommand(["gnt-group", "remove", group1])
+  AssertCommand(["gnt-network", "remove", network1])
index ac7fc90..809f879 100644 (file)
@@ -36,32 +36,35 @@ from qa_utils import AssertCommand, AssertEqual
 
 
 def _NodeAdd(node, readd=False):
-  if not readd and node.get("_added", False):
-    raise qa_error.Error("Node %s already in cluster" % node["primary"])
-  elif readd and not node.get("_added", False):
-    raise qa_error.Error("Node %s not yet in cluster" % node["primary"])
+  if not readd and node.added:
+    raise qa_error.Error("Node %s already in cluster" % node.primary)
+  elif readd and not node.added:
+    raise qa_error.Error("Node %s not yet in cluster" % node.primary)
 
   cmd = ["gnt-node", "add", "--no-ssh-key-check"]
-  if node.get("secondary", None):
-    cmd.append("--secondary-ip=%s" % node["secondary"])
+  if node.secondary:
+    cmd.append("--secondary-ip=%s" % node.secondary)
   if readd:
     cmd.append("--readd")
-  cmd.append(node["primary"])
+  cmd.append(node.primary)
 
   AssertCommand(cmd)
 
-  node["_added"] = True
+  if readd:
+    assert node.added
+  else:
+    node.MarkAdded()
 
 
 def _NodeRemove(node):
-  AssertCommand(["gnt-node", "remove", node["primary"]])
-  node["_added"] = False
+  AssertCommand(["gnt-node", "remove", node.primary])
+  node.MarkRemoved()
 
 
 def MakeNodeOffline(node, value):
   """gnt-node modify --offline=value"""
   # value in ["yes", "no"]
-  AssertCommand(["gnt-node", "modify", "--offline", value, node["primary"]])
+  AssertCommand(["gnt-node", "modify", "--offline", value, node.primary])
 
 
 def TestNodeAddAll():
@@ -81,7 +84,7 @@ def MarkNodeAddedAll():
   master = qa_config.GetMasterNode()
   for node in qa_config.get("nodes"):
     if node != master:
-      node["_added"] = True
+      node.MarkAdded()
 
 
 def TestNodeRemoveAll():
@@ -125,7 +128,7 @@ def TestNodeStorage():
     cmd = ["gnt-node", "list-storage", "--storage-type", storage_type,
            "--output=node,name,allocatable", "--separator=|",
            "--no-headers"]
-    output = qa_utils.GetCommandOutput(master["primary"],
+    output = qa_utils.GetCommandOutput(master.primary,
                                        utils.ShellQuoteArgs(cmd))
 
     # Test with up to two devices
@@ -155,7 +158,7 @@ def TestNodeStorage():
         cmd = ["gnt-node", "list-storage", "--storage-type", storage_type,
                "--output=name,allocatable", "--separator=|",
                "--no-headers", node_name]
-        listout = qa_utils.GetCommandOutput(master["primary"],
+        listout = qa_utils.GetCommandOutput(master.primary,
                                             utils.ShellQuoteArgs(cmd))
         for line in listout.splitlines():
           (vfy_name, vfy_allocatable) = line.split("|")
@@ -179,10 +182,10 @@ def TestNodeFailover(node, node2):
                                      " it to have no primary instances.")
 
   # Fail over to secondary node
-  AssertCommand(["gnt-node", "failover", "-f", node["primary"]])
+  AssertCommand(["gnt-node", "failover", "-f", node.primary])
 
   # ... and back again.
-  AssertCommand(["gnt-node", "failover", "-f", node2["primary"]])
+  AssertCommand(["gnt-node", "failover", "-f", node2.primary])
 
 
 def TestNodeEvacuate(node, node2):
@@ -196,13 +199,13 @@ def TestNodeEvacuate(node, node2):
 
     # Evacuate all secondary instances
     AssertCommand(["gnt-node", "evacuate", "-f",
-                   "--new-secondary=%s" % node3["primary"], node2["primary"]])
+                   "--new-secondary=%s" % node3.primary, node2.primary])
 
     # ... and back again.
     AssertCommand(["gnt-node", "evacuate", "-f",
-                   "--new-secondary=%s" % node2["primary"], node3["primary"]])
+                   "--new-secondary=%s" % node2.primary, node3.primary])
   finally:
-    qa_config.ReleaseNode(node3)
+    node3.Release()
 
 
 def TestNodeModify(node):
@@ -210,23 +213,23 @@ def TestNodeModify(node):
   for flag in ["master-candidate", "drained", "offline"]:
     for value in ["yes", "no"]:
       AssertCommand(["gnt-node", "modify", "--force",
-                     "--%s=%s" % (flag, value), node["primary"]])
+                     "--%s=%s" % (flag, value), node.primary])
 
   AssertCommand(["gnt-node", "modify", "--master-candidate=yes",
-                 "--auto-promote", node["primary"]])
+                 "--auto-promote", node.primary])
 
   # Test setting secondary IP address
-  AssertCommand(["gnt-node", "modify", "--secondary-ip=%s" % node["secondary"],
-                 node["primary"]])
+  AssertCommand(["gnt-node", "modify", "--secondary-ip=%s" % node.secondary,
+                 node.primary])
 
 
 def _CreateOobScriptStructure():
   """Create a simple OOB handling script and its structure."""
   master = qa_config.GetMasterNode()
 
-  data_path = qa_utils.UploadData(master["primary"], "")
-  verify_path = qa_utils.UploadData(master["primary"], "")
-  exit_code_path = qa_utils.UploadData(master["primary"], "")
+  data_path = qa_utils.UploadData(master.primary, "")
+  verify_path = qa_utils.UploadData(master.primary, "")
+  exit_code_path = qa_utils.UploadData(master.primary, "")
 
   oob_script = (("#!/bin/bash\n"
                  "echo \"$@\" > %s\n"
@@ -234,7 +237,7 @@ def _CreateOobScriptStructure():
                  "exit $(< %s)\n") %
                 (utils.ShellQuote(verify_path), utils.ShellQuote(data_path),
                  utils.ShellQuote(exit_code_path)))
-  oob_path = qa_utils.UploadData(master["primary"], oob_script, mode=0700)
+  oob_path = qa_utils.UploadData(master.primary, oob_script, mode=0700)
 
   return [oob_path, verify_path, data_path, exit_code_path]
 
@@ -242,7 +245,7 @@ def _CreateOobScriptStructure():
 def _UpdateOobFile(path, data):
   """Updates the data file with data."""
   master = qa_config.GetMasterNode()
-  qa_utils.UploadData(master["primary"], data, filename=path)
+  qa_utils.UploadData(master.primary, data, filename=path)
 
 
 def _AssertOobCall(verify_path, expected_args):
@@ -250,7 +253,7 @@ def _AssertOobCall(verify_path, expected_args):
   master = qa_config.GetMasterNode()
 
   verify_output_cmd = utils.ShellQuoteArgs(["cat", verify_path])
-  output = qa_utils.GetCommandOutput(master["primary"], verify_output_cmd,
+  output = qa_utils.GetCommandOutput(master.primary, verify_output_cmd,
                                      tty=False)
 
   AssertEqual(expected_args, output.strip())
@@ -262,8 +265,8 @@ def TestOutOfBand():
 
   node = qa_config.AcquireNode(exclude=master)
 
-  master_name = master["primary"]
-  node_name = node["primary"]
+  master_name = master.primary
+  node_name = node.primary
   full_node_name = qa_utils.ResolveNodeName(node)
 
   (oob_path, verify_path,
@@ -388,10 +391,10 @@ def TestOutOfBand():
     AssertCommand(["gnt-node", "health"], fail=True)
 
     # Different OOB script for node
-    verify_path2 = qa_utils.UploadData(master["primary"], "")
+    verify_path2 = qa_utils.UploadData(master.primary, "")
     oob_script = ("#!/bin/sh\n"
                   "echo \"$@\" > %s\n") % verify_path2
-    oob_path2 = qa_utils.UploadData(master["primary"], oob_script, mode=0700)
+    oob_path2 = qa_utils.UploadData(master.primary, oob_script, mode=0700)
 
     try:
       AssertCommand(["gnt-node", "modify", "--node-parameters",
@@ -421,7 +424,7 @@ def TestNodeListFields():
 
 def TestNodeListDrbd(node):
   """gnt-node list-drbd"""
-  AssertCommand(["gnt-node", "list-drbd", node["primary"]])
+  AssertCommand(["gnt-node", "list-drbd", node.primary])
 
 
 def _BuildSetESCmd(action, value, node_name):
@@ -440,4 +443,4 @@ def TestExclStorSingleNode(node):
   """
   for action in ["add", "modify"]:
     for value in (True, False, "default"):
-      AssertCommand(_BuildSetESCmd(action, value, node["primary"]), fail=True)
+      AssertCommand(_BuildSetESCmd(action, value, node.primary), fail=True)
index b6724e0..f96e27e 100644 (file)
@@ -109,7 +109,7 @@ def _SetupTempOs(node, dirname, variant, valid):
   cmd = " && ".join(parts)
 
   print qa_utils.FormatInfo("Setting up %s with %s OS definition" %
-                            (node["primary"],
+                            (node.primary,
                              ["an invalid", "a valid"][int(valid)]))
 
   AssertCommand(cmd, node=node)
@@ -163,7 +163,7 @@ def _TestOs(mode, rapi_cb):
       AssertCommand(["gnt-os", "diagnose"], fail=(mode != _ALL_VALID))
 
       # Diagnose again, ignoring exit status
-      output = qa_utils.GetCommandOutput(master["primary"],
+      output = qa_utils.GetCommandOutput(master.primary,
                                          "gnt-os diagnose || :")
       for line in output.splitlines():
         if line.startswith("OS: %s [global status:" % name):
@@ -173,13 +173,13 @@ def _TestOs(mode, rapi_cb):
 
       # Check info for all
       cmd = ["gnt-os", "info"]
-      output = qa_utils.GetCommandOutput(master["primary"],
+      output = qa_utils.GetCommandOutput(master.primary,
                                          utils.ShellQuoteArgs(cmd))
       AssertIn("%s:" % name, output.splitlines())
 
       # Check info for OS
       cmd = ["gnt-os", "info", name]
-      output = qa_utils.GetCommandOutput(master["primary"],
+      output = qa_utils.GetCommandOutput(master.primary,
                                          utils.ShellQuoteArgs(cmd)).splitlines()
       AssertIn("%s:" % name, output)
       for (field, value) in [("valid", mode == _ALL_VALID),
@@ -189,7 +189,7 @@ def _TestOs(mode, rapi_cb):
 
       # Only valid OSes should be listed
       cmd = ["gnt-os", "list", "--no-headers"]
-      output = qa_utils.GetCommandOutput(master["primary"],
+      output = qa_utils.GetCommandOutput(master.primary,
                                          utils.ShellQuoteArgs(cmd))
       if mode == _ALL_VALID and not (hidden or blacklisted):
         assert_fn = AssertIn
index 4ffa916..887f3b7 100644 (file)
@@ -80,7 +80,7 @@ def Setup(username, password):
 
   # Write to temporary file
   _rapi_ca = tempfile.NamedTemporaryFile()
-  _rapi_ca.write(qa_utils.GetCommandOutput(master["primary"],
+  _rapi_ca.write(qa_utils.GetCommandOutput(master.primary,
                                            utils.ShellQuoteArgs(cmd)))
   _rapi_ca.flush()
 
@@ -88,7 +88,7 @@ def Setup(username, password):
   cfg_curl = rapi.client.GenericCurlConfig(cafile=_rapi_ca.name,
                                            proxy="")
 
-  _rapi_client = rapi.client.GanetiRapiClient(master["primary"], port=port,
+  _rapi_client = rapi.client.GanetiRapiClient(master.primary, port=port,
                                               username=username,
                                               password=password,
                                               curl_config_fn=cfg_curl)
@@ -377,19 +377,19 @@ def TestInstance(instance):
       _VerifyInstance(instance_data)
 
   _DoTests([
-    ("/2/instances/%s" % instance["name"], _VerifyInstance, "GET", None),
+    ("/2/instances/%s" % instance.name, _VerifyInstance, "GET", None),
     ("/2/instances", _VerifyInstancesList, "GET", None),
     ("/2/instances?bulk=1", _VerifyInstancesBulk, "GET", None),
-    ("/2/instances/%s/activate-disks" % instance["name"],
+    ("/2/instances/%s/activate-disks" % instance.name,
      _VerifyReturnsJob, "PUT", None),
-    ("/2/instances/%s/deactivate-disks" % instance["name"],
+    ("/2/instances/%s/deactivate-disks" % instance.name,
      _VerifyReturnsJob, "PUT", None),
     ])
 
   # Test OpBackupPrepare
   (job_id, ) = _DoTests([
     ("/2/instances/%s/prepare-export?mode=%s" %
-     (instance["name"], constants.EXPORT_MODE_REMOTE),
+     (instance.name, constants.EXPORT_MODE_REMOTE),
      _VerifyReturnsJob, "PUT", None),
     ])
 
@@ -418,7 +418,7 @@ def TestNode(node):
       _VerifyNode(node_data)
 
   _DoTests([
-    ("/2/nodes/%s" % node["primary"], _VerifyNode, "GET", None),
+    ("/2/nodes/%s" % node.primary, _VerifyNode, "GET", None),
     ("/2/nodes", _VerifyNodesList, "GET", None),
     ("/2/nodes?bulk=1", _VerifyNodesBulk, "GET", None),
     ])
@@ -554,12 +554,11 @@ def TestRapiNodeGroups():
 def TestRapiInstanceAdd(node, use_client):
   """Test adding a new instance via RAPI"""
   instance = qa_config.AcquireInstance()
-  qa_config.SetInstanceTemplate(instance, constants.DT_PLAIN)
+  instance.SetDiskTemplate(constants.DT_PLAIN)
   try:
     disk_sizes = [utils.ParseUnit(size) for size in qa_config.get("disk")]
     disks = [{"size": size} for size in disk_sizes]
-    nic0_mac = qa_config.GetInstanceNicMac(instance,
-                                           default=constants.VALUE_GENERATE)
+    nic0_mac = instance.GetNicMacAddr(0, constants.VALUE_GENERATE)
     nics = [{
       constants.INIC_MAC: nic0_mac,
       }]
@@ -571,20 +570,20 @@ def TestRapiInstanceAdd(node, use_client):
 
     if use_client:
       job_id = _rapi_client.CreateInstance(constants.INSTANCE_CREATE,
-                                           instance["name"],
+                                           instance.name,
                                            constants.DT_PLAIN,
                                            disks, nics,
                                            os=qa_config.get("os"),
-                                           pnode=node["primary"],
+                                           pnode=node.primary,
                                            beparams=beparams)
     else:
       body = {
         "__version__": 1,
         "mode": constants.INSTANCE_CREATE,
-        "name": instance["name"],
+        "name": instance.name,
         "os_type": qa_config.get("os"),
         "disk_template": constants.DT_PLAIN,
-        "pnode": node["primary"],
+        "pnode": node.primary,
         "beparams": beparams,
         "disks": disks,
         "nics": nics,
@@ -598,7 +597,7 @@ def TestRapiInstanceAdd(node, use_client):
 
     return instance
   except:
-    qa_config.ReleaseInstance(instance)
+    instance.Release()
     raise
 
 
@@ -606,16 +605,14 @@ def TestRapiInstanceAdd(node, use_client):
 def TestRapiInstanceRemove(instance, use_client):
   """Test removing instance via RAPI"""
   if use_client:
-    job_id = _rapi_client.DeleteInstance(instance["name"])
+    job_id = _rapi_client.DeleteInstance(instance.name)
   else:
     (job_id, ) = _DoTests([
-      ("/2/instances/%s" % instance["name"], _VerifyReturnsJob, "DELETE", None),
+      ("/2/instances/%s" % instance.name, _VerifyReturnsJob, "DELETE", None),
       ])
 
   _WaitForRapiJob(job_id)
 
-  qa_config.ReleaseInstance(instance)
-
 
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
 def TestRapiInstanceMigrate(instance):
@@ -625,10 +622,10 @@ def TestRapiInstanceMigrate(instance):
                               " test")
     return
   # Move to secondary node
-  _WaitForRapiJob(_rapi_client.MigrateInstance(instance["name"]))
+  _WaitForRapiJob(_rapi_client.MigrateInstance(instance.name))
   qa_utils.RunInstanceCheck(instance, True)
   # And back to previous primary
-  _WaitForRapiJob(_rapi_client.MigrateInstance(instance["name"]))
+  _WaitForRapiJob(_rapi_client.MigrateInstance(instance.name))
 
 
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
@@ -639,22 +636,22 @@ def TestRapiInstanceFailover(instance):
                               " test")
     return
   # Move to secondary node
-  _WaitForRapiJob(_rapi_client.FailoverInstance(instance["name"]))
+  _WaitForRapiJob(_rapi_client.FailoverInstance(instance.name))
   qa_utils.RunInstanceCheck(instance, True)
   # And back to previous primary
-  _WaitForRapiJob(_rapi_client.FailoverInstance(instance["name"]))
+  _WaitForRapiJob(_rapi_client.FailoverInstance(instance.name))
 
 
 @InstanceCheck(INST_UP, INST_DOWN, FIRST_ARG)
 def TestRapiInstanceShutdown(instance):
   """Test stopping an instance via RAPI"""
-  _WaitForRapiJob(_rapi_client.ShutdownInstance(instance["name"]))
+  _WaitForRapiJob(_rapi_client.ShutdownInstance(instance.name))
 
 
 @InstanceCheck(INST_DOWN, INST_UP, FIRST_ARG)
 def TestRapiInstanceStartup(instance):
   """Test starting an instance via RAPI"""
-  _WaitForRapiJob(_rapi_client.StartupInstance(instance["name"]))
+  _WaitForRapiJob(_rapi_client.StartupInstance(instance.name))
 
 
 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
@@ -675,12 +672,12 @@ def TestRapiInstanceRenameAndBack(rename_source, rename_target):
 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
 def TestRapiInstanceReinstall(instance):
   """Test reinstalling an instance via RAPI"""
-  _WaitForRapiJob(_rapi_client.ReinstallInstance(instance["name"]))
+  _WaitForRapiJob(_rapi_client.ReinstallInstance(instance.name))
   # By default, the instance is started again
   qa_utils.RunInstanceCheck(instance, True)
 
   # Reinstall again without starting
-  _WaitForRapiJob(_rapi_client.ReinstallInstance(instance["name"],
+  _WaitForRapiJob(_rapi_client.ReinstallInstance(instance.name,
                                                  no_startup=True))
 
 
@@ -692,9 +689,9 @@ def TestRapiInstanceReplaceDisks(instance):
                               " skipping test")
     return
   fn = _rapi_client.ReplaceInstanceDisks
-  _WaitForRapiJob(fn(instance["name"],
+  _WaitForRapiJob(fn(instance.name,
                      mode=constants.REPLACE_DISK_AUTO, disks=[]))
-  _WaitForRapiJob(fn(instance["name"],
+  _WaitForRapiJob(fn(instance.name,
                      mode=constants.REPLACE_DISK_SEC, disks="0"))
 
 
@@ -704,7 +701,7 @@ def TestRapiInstanceModify(instance):
   default_hv = qa_config.GetDefaultHypervisor()
 
   def _ModifyInstance(**kwargs):
-    _WaitForRapiJob(_rapi_client.ModifyInstance(instance["name"], **kwargs))
+    _WaitForRapiJob(_rapi_client.ModifyInstance(instance.name, **kwargs))
 
   _ModifyInstance(beparams={
     constants.BE_VCPUS: 3,
@@ -733,17 +730,17 @@ def TestRapiInstanceModify(instance):
 @InstanceCheck(INST_UP, INST_UP, FIRST_ARG)
 def TestRapiInstanceConsole(instance):
   """Test getting instance console information via RAPI"""
-  result = _rapi_client.GetInstanceConsole(instance["name"])
+  result = _rapi_client.GetInstanceConsole(instance.name)
   console = objects.InstanceConsole.FromDict(result)
   AssertEqual(console.Validate(), True)
-  AssertEqual(console.instance, qa_utils.ResolveInstanceName(instance["name"]))
+  AssertEqual(console.instance, qa_utils.ResolveInstanceName(instance.name))
 
 
 @InstanceCheck(INST_DOWN, INST_DOWN, FIRST_ARG)
 def TestRapiStoppedInstanceConsole(instance):
   """Test getting stopped instance's console information via RAPI"""
   try:
-    _rapi_client.GetInstanceConsole(instance["name"])
+    _rapi_client.GetInstanceConsole(instance.name)
   except rapi.client.GanetiApiError, err:
     AssertEqual(err.code, 503)
   else:
@@ -767,8 +764,7 @@ def TestInterClusterInstanceMove(src_instance, dest_instance,
   rapi_pw_file.write(_rapi_password)
   rapi_pw_file.flush()
 
-  qa_config.SetInstanceTemplate(dest_instance,
-                                qa_config.GetInstanceTemplate(src_instance))
+  dest_instance.SetDiskTemplate(src_instance.disk_template)
 
   # TODO: Run some instance tests before moving back
 
@@ -784,10 +780,10 @@ def TestInterClusterInstanceMove(src_instance, dest_instance,
   pnode = inodes[0]
   # note: pnode:snode are the *current* nodes, so we move it first to
   # tnode:pnode, then back to pnode:snode
-  for si, di, pn, sn in [(src_instance["name"], dest_instance["name"],
-                          tnode["primary"], pnode["primary"]),
-                         (dest_instance["name"], src_instance["name"],
-                          pnode["primary"], snode["primary"])]:
+  for si, di, pn, sn in [(src_instance.name, dest_instance.name,
+                          tnode.primary, pnode.primary),
+                         (dest_instance.name, src_instance.name,
+                          pnode.primary, snode.primary)]:
     cmd = [
       "../tools/move-instance",
       "--verbose",
@@ -798,8 +794,8 @@ def TestInterClusterInstanceMove(src_instance, dest_instance,
       "--dest-primary-node=%s" % pn,
       "--dest-secondary-node=%s" % sn,
       "--net=0:mac=%s" % constants.VALUE_GENERATE,
-      master["primary"],
-      master["primary"],
+      master.primary,
+      master.primary,
       si,
       ]
 
index 5aa316d..834f7a8 100644 (file)
@@ -72,7 +72,7 @@ def TestClusterTags():
 
 def TestNodeTags(node):
   """gnt-node tags"""
-  _TestTags(constants.TAG_NODE, node["primary"])
+  _TestTags(constants.TAG_NODE, node.primary)
 
 
 def TestGroupTags(group):
@@ -82,4 +82,4 @@ def TestGroupTags(group):
 
 def TestInstanceTags(instance):
   """gnt-instance tags"""
-  _TestTags(constants.TAG_INSTANCE, instance["name"])
+  _TestTags(constants.TAG_INSTANCE, instance.name)
index 774ccc9..9533c28 100644 (file)
@@ -29,6 +29,7 @@ import sys
 import subprocess
 import random
 import tempfile
+import operator
 
 try:
   import functools
@@ -129,21 +130,17 @@ def AssertMatch(string, pattern):
     raise qa_error.Error("%r doesn't match /%r/" % (string, pattern))
 
 
-def _GetName(entity, key):
+def _GetName(entity, fn):
   """Tries to get name of an entity.
 
   @type entity: string or dict
-  @type key: string
-  @param key: Dictionary key containing name
+  @param fn: Function retrieving name from entity
 
   """
   if isinstance(entity, basestring):
     result = entity
-  elif isinstance(entity, dict):
-    result = entity[key]
   else:
-    raise qa_error.Error("Expected string or dictionary, got %s: %s" %
-                         (type(entity), entity))
+    result = fn(entity)
 
   if not ht.TNonEmptyString(result):
     raise Exception("Invalid name '%s'" % result)
@@ -182,7 +179,7 @@ def AssertCommand(cmd, fail=False, node=None, log_cmd=True):
   if node is None:
     node = qa_config.GetMasterNode()
 
-  nodename = _GetName(node, "primary")
+  nodename = _GetName(node, operator.attrgetter("primary"))
 
   if isinstance(cmd, basestring):
     cmdstr = cmd
@@ -404,7 +401,7 @@ def _ResolveName(cmd, key):
   """
   master = qa_config.GetMasterNode()
 
-  output = GetCommandOutput(master["primary"], utils.ShellQuoteArgs(cmd))
+  output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd))
   for line in output.splitlines():
     (lkey, lvalue) = line.split(":", 1)
     if lkey == key:
@@ -427,7 +424,7 @@ def ResolveNodeName(node):
   """Gets the full name of a node.
 
   """
-  return _ResolveName(["gnt-node", "info", node["primary"]],
+  return _ResolveName(["gnt-node", "info", node.primary],
                       "Node name")
 
 
@@ -441,7 +438,7 @@ def GetNodeInstances(node, secondaries=False):
   # Get list of all instances
   cmd = ["gnt-instance", "list", "--separator=:", "--no-headers",
          "--output=name,pnode,snodes"]
-  output = GetCommandOutput(master["primary"], utils.ShellQuoteArgs(cmd))
+  output = GetCommandOutput(master.primary, utils.ShellQuoteArgs(cmd))
 
   instances = []
   for line in output.splitlines():
@@ -485,7 +482,7 @@ def _List(listcmd, fields, names):
   if names:
     cmd.extend(names)
 
-  return GetCommandOutput(master["primary"],
+  return GetCommandOutput(master.primary,
                           utils.ShellQuoteArgs(cmd)).splitlines()
 
 
@@ -541,7 +538,7 @@ def GenericQueryFieldsTest(cmd, fields):
 
   # Check listed fields (all, must be sorted)
   realcmd = [cmd, "list-fields", "--separator=|", "--no-headers"]
-  output = GetCommandOutput(master["primary"],
+  output = GetCommandOutput(master.primary,
                             utils.ShellQuoteArgs(realcmd)).splitlines()
   AssertEqual([line.split("|", 1)[0] for line in output],
               utils.NiceSort(fields))
@@ -570,7 +567,7 @@ def AddToEtcHosts(hostnames):
 
   """
   master = qa_config.GetMasterNode()
-  tmp_hosts = UploadData(master["primary"], "", mode=0644)
+  tmp_hosts = UploadData(master.primary, "", mode=0644)
 
   data = []
   for localhost in ("::1", "127.0.0.1"):
@@ -595,7 +592,7 @@ def RemoveFromEtcHosts(hostnames):
 
   """
   master = qa_config.GetMasterNode()
-  tmp_hosts = UploadData(master["primary"], "", mode=0644)
+  tmp_hosts = UploadData(master.primary, "", mode=0644)
   quoted_tmp_hosts = utils.ShellQuote(tmp_hosts)
 
   sed_data = " ".join(hostnames)
@@ -614,7 +611,7 @@ def RunInstanceCheck(instance, running):
   """Check if instance is running or not.
 
   """
-  instance_name = _GetName(instance, "name")
+  instance_name = _GetName(instance, operator.attrgetter("name"))
 
   script = qa_config.GetInstanceCheckScript()
   if not script:
@@ -623,7 +620,7 @@ def RunInstanceCheck(instance, running):
   master_node = qa_config.GetMasterNode()
 
   # Build command to connect to master node
-  master_ssh = GetSSHCommand(master_node["primary"], "--")
+  master_ssh = GetSSHCommand(master_node.primary, "--")
 
   if running:
     running_shellval = "1"
@@ -695,17 +692,40 @@ def GetNonexistentGroups(count):
   """Gets group names which shouldn't exist on the cluster.
 
   @param count: Number of groups to get
-  @rtype: list
+  @rtype: integer
 
   """
-  groups = qa_config.get("groups", {})
+  return GetNonexistentEntityNames(count, "groups", "group")
 
-  default = ["group1", "group2", "group3"]
+
+def GetNonexistentEntityNames(count, name_config, name_prefix):
+  """Gets entity names which shouldn't exist on the cluster.
+
+  The actualy names can refer to arbitrary entities (for example
+  groups, networks).
+
+  @param count: Number of names to get
+  @rtype: integer
+  @param name_config: name of the leaf in the config containing
+    this entity's configuration, including a 'inexistent-'
+    element
+  @rtype: string
+  @param name_prefix: prefix of the entity's names, used to compose
+    the default values; for example for groups, the prefix is
+    'group' and the generated names are then group1, group2, ...
+  @rtype: string
+
+  """
+  entities = qa_config.get(name_config, {})
+
+  default = [name_prefix + str(i) for i in range(count)]
   assert count <= len(default)
 
-  candidates = groups.get("inexistent-groups", default)[:count]
+  name_config_inexistent = "inexistent-" + name_config
+  candidates = entities.get(name_config_inexistent, default)[:count]
 
   if len(candidates) < count:
-    raise Exception("At least %s non-existent groups are needed" % count)
+    raise Exception("At least %s non-existent %s are needed" %
+                    (count, name_config))
 
   return candidates
index 17750b4..80f9ad9 100644 (file)
@@ -123,7 +123,7 @@ argComplToText :: ArgCompletion -> String
 argComplToText (ArgCompletion optc min_cnt max_cnt) =
   complToText optc ++ " " ++ show min_cnt ++ " " ++ maybe "none" show max_cnt
 
--- | Abrreviation for the option type.
+-- | Abbreviation for the option type.
 type GenericOptType a = (OptDescr (a -> Result a), OptCompletion)
 
 -- | Type class for options which support help and version.
index 2561cb0..18dce62 100644 (file)
@@ -44,6 +44,7 @@ module Ganeti.Config
     , getGroupOfNode
     , getInstPrimaryNode
     , getInstMinorsForNode
+    , getNetwork
     , buildLinkIpInstnameMap
     , instNodes
     ) where
@@ -210,6 +211,18 @@ getGroupInstances cfg gname =
       ginsts = map (getNodeInstances cfg) gnodes in
   (concatMap fst ginsts, concatMap snd ginsts)
 
+-- | Looks up a network. If looking up by uuid fails, we look up
+-- by name.
+getNetwork :: ConfigData -> String -> ErrorResult Network
+getNetwork cfg name =
+  let networks = fromContainer (configNetworks cfg)
+  in case getItem "Network" name networks of
+       Ok net -> Ok net
+       Bad _ -> let by_name = M.mapKeys
+                              (fromNonEmpty . networkName . (M.!) networks)
+                              networks
+                in getItem "Network" name by_name
+
 -- | Looks up an instance's primary node.
 getInstPrimaryNode :: ConfigData -> String -> ErrorResult Node
 getInstPrimaryNode cfg name =
index b089944..5e9bf08 100644 (file)
@@ -55,6 +55,7 @@ module Ganeti.HTools.CLI
   , oGroup
   , oIAllocSrc
   , oInstMoves
+  , oJobDelay
   , genOLuxiSocket
   , oLuxiSocket
   , oMachineReadable
@@ -120,6 +121,7 @@ data Options = Options
   , optIAllocSrc   :: Maybe FilePath -- ^ The iallocation spec
   , optSelInst     :: [String]       -- ^ Instances to be excluded
   , optLuxi        :: Maybe FilePath -- ^ Collect data from Luxi
+  , optJobDelay    :: Double         -- ^ Delay before executing first job
   , optMachineReadable :: Bool       -- ^ Output machine-readable format
   , optMaster      :: String         -- ^ Collect data from RAPI
   , optMaxLength   :: Int            -- ^ Stop after this many steps
@@ -165,6 +167,7 @@ defaultOptions  = Options
   , optIAllocSrc   = Nothing
   , optSelInst     = []
   , optLuxi        = Nothing
+  , optJobDelay    = 10
   , optMachineReadable = False
   , optMaster      = ""
   , optMaxLength   = -1
@@ -330,6 +333,15 @@ oIAllocSrc =
    "Specify an iallocator spec as the cluster data source",
    OptComplFile)
 
+oJobDelay :: OptType
+oJobDelay =
+  (Option "" ["job-delay"]
+   (reqWithConversion (tryRead "job delay")
+    (\d opts -> Ok opts { optJobDelay = d }) "SECONDS")
+   "insert this much delay before the execution of repair jobs\
+   \ to allow the tool to continue processing instances",
+   OptComplFloat)
+
 genOLuxiSocket :: String -> OptType
 genOLuxiSocket defSocket =
   (Option "L" ["luxi"]
diff --git a/src/Ganeti/HTools/Program/Harep.hs b/src/Ganeti/HTools/Program/Harep.hs
new file mode 100644 (file)
index 0000000..520deeb
--- /dev/null
@@ -0,0 +1,478 @@
+{-# LANGUAGE TupleSections #-}
+
+{-| Auto-repair tool for Ganeti.
+
+-}
+
+{-
+
+Copyright (C) 2013 Google Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+
+-}
+
+module Ganeti.HTools.Program.Harep
+  ( main
+  , arguments
+  , options) where
+
+import Control.Exception (bracket)
+import Control.Monad
+import Data.Function
+import Data.List
+import Data.Maybe
+import Data.Ord
+import System.Time
+import qualified Data.Map as Map
+
+import Ganeti.BasicTypes
+import Ganeti.Common
+import Ganeti.Errors
+import Ganeti.Jobs
+import Ganeti.OpCodes
+import Ganeti.OpParams
+import Ganeti.Types
+import Ganeti.Utils
+import qualified Ganeti.Constants as C
+import qualified Ganeti.Luxi as L
+import qualified Ganeti.Path as Path
+
+import Ganeti.HTools.CLI
+import Ganeti.HTools.Loader
+import Ganeti.HTools.ExtLoader
+import Ganeti.HTools.Types
+import qualified Ganeti.HTools.Container as Container
+import qualified Ganeti.HTools.Instance as Instance
+import qualified Ganeti.HTools.Node as Node
+
+-- | Options list and functions.
+options :: IO [OptType]
+options = do
+  luxi <- oLuxiSocket
+  return
+    [ luxi
+    , oJobDelay
+    ]
+
+arguments :: [ArgCompletion]
+arguments = []
+
+data InstanceData = InstanceData { arInstance :: Instance.Instance
+                                 , arState :: AutoRepairStatus
+                                 , tagsToRemove :: [String]
+                                 }
+                    deriving (Eq, Show)
+
+-- | Parse a tag into an 'AutoRepairData' record.
+--
+-- @Nothing@ is returned if the tag is not an auto-repair tag, or if it's
+-- malformed.
+parseInitTag :: String -> Maybe AutoRepairData
+parseInitTag tag =
+  let parsePending = do
+        subtag <- chompPrefix C.autoRepairTagPending tag
+        case sepSplit ':' subtag of
+          [rtype, uuid, ts, jobs] -> makeArData rtype uuid ts jobs
+          _                       -> fail ("Invalid tag: " ++ show tag)
+
+      parseResult = do
+        subtag <- chompPrefix C.autoRepairTagResult tag
+        case sepSplit ':' subtag of
+          [rtype, uuid, ts, result, jobs] -> do
+            arData <- makeArData rtype uuid ts jobs
+            result' <- autoRepairResultFromRaw result
+            return arData { arResult = Just result' }
+          _                               -> fail ("Invalid tag: " ++ show tag)
+
+      makeArData rtype uuid ts jobs = do
+        rtype' <- autoRepairTypeFromRaw rtype
+        ts' <- tryRead "auto-repair time" ts
+        jobs' <- mapM makeJobIdS $ sepSplit '+' jobs
+        return AutoRepairData { arType = rtype'
+                              , arUuid = uuid
+                              , arTime = TOD ts' 0
+                              , arJobs = jobs'
+                              , arResult = Nothing
+                              , arTag = tag
+                              }
+  in
+   parsePending `mplus` parseResult
+
+-- | Return the 'AutoRepairData' element of an 'AutoRepairStatus' type.
+getArData :: AutoRepairStatus -> Maybe AutoRepairData
+getArData status =
+  case status of
+    ArHealthy (Just d) -> Just d
+    ArFailedRepair  d  -> Just d
+    ArPendingRepair d  -> Just d
+    ArNeedsRepair   d  -> Just d
+    _                  -> Nothing
+
+-- | Return a short name for each auto-repair status.
+--
+-- This is a more concise representation of the status, because the default
+-- "Show" formatting includes all the accompanying auto-repair data.
+arStateName :: AutoRepairStatus -> String
+arStateName status =
+  case status of
+    ArHealthy _       -> "Healthy"
+    ArFailedRepair _  -> "Failure"
+    ArPendingRepair _ -> "Pending repair"
+    ArNeedsRepair _   -> "Needs repair"
+
+-- | Return a new list of tags to remove that includes @arTag@ if present.
+delCurTag :: InstanceData -> [String]
+delCurTag instData =
+  let arData = getArData $ arState instData
+      rmTags = tagsToRemove instData
+  in
+   case arData of
+     Just d  -> arTag d : rmTags
+     Nothing -> rmTags
+
+-- | Set the initial auto-repair state of an instance from its auto-repair tags.
+--
+-- The rules when there are multiple tags is:
+--
+--   * the earliest failure result always wins
+--
+--   * two or more pending repairs results in a fatal error
+--
+--   * a pending result from id X and a success result from id Y result in error
+--     if Y is newer than X
+--
+--   * if there are no pending repairs, the newest success result wins,
+--     otherwise the pending result is used.
+setInitialState :: Instance.Instance -> Result InstanceData
+setInitialState inst =
+  let arData = mapMaybe parseInitTag $ Instance.allTags inst
+      -- Group all the AutoRepairData records by id (i.e. by repair task), and
+      -- present them from oldest to newest.
+      arData' = sortBy (comparing arUuid) arData
+      arGroups = groupBy ((==) `on` arUuid) arData'
+      arGroups' = sortBy (comparing $ minimum . map arTime) arGroups
+  in
+   foldM arStatusCmp (InstanceData inst (ArHealthy Nothing) []) arGroups'
+
+-- | Update the initial status of an instance with new repair task tags.
+--
+-- This function gets called once per repair group in an instance's tag, and it
+-- determines whether to set the status of the instance according to this new
+-- group, or to keep the existing state. See the documentation for
+-- 'setInitialState' for the rules to be followed when determining this.
+arStatusCmp :: InstanceData -> [AutoRepairData] -> Result InstanceData
+arStatusCmp instData arData =
+  let curSt = arState instData
+      arData' = sortBy (comparing keyfn) arData
+      keyfn d = (arResult d, arTime d)
+      newData = last arData'
+      newSt = case arResult newData of
+                Just ArSuccess -> ArHealthy $ Just newData
+                Just ArEnoperm -> ArHealthy $ Just newData
+                Just ArFailure -> ArFailedRepair newData
+                Nothing        -> ArPendingRepair newData
+  in
+   case curSt of
+     ArFailedRepair _ -> Ok instData  -- Always keep the earliest failure.
+     ArHealthy _      -> Ok instData { arState = newSt
+                                     , tagsToRemove = delCurTag instData
+                                     }
+     ArPendingRepair d -> Bad (
+       "An unfinished repair was found in instance " ++
+       Instance.name (arInstance instData) ++ ": found tag " ++
+       show (arTag newData) ++ ", but older pending tag " ++
+       show (arTag d) ++ "exists.")
+
+     ArNeedsRepair _ -> Bad
+       "programming error: ArNeedsRepair found as an initial state"
+
+-- | Query jobs of a pending repair, returning the new instance data.
+processPending :: L.Client -> InstanceData -> IO InstanceData
+processPending client instData =
+  case arState instData of
+    (ArPendingRepair arData) -> do
+      sts <- L.queryJobsStatus client $ arJobs arData
+      time <- getClockTime
+      case sts of
+        Bad e -> exitErr $ "could not check job status: " ++ formatError e
+        Ok sts' ->
+          if any (<= JOB_STATUS_RUNNING) sts' then
+            return instData -- (no change)
+          else do
+            let iname = Instance.name $ arInstance instData
+                srcSt = arStateName $ arState instData
+                destSt = arStateName arState'
+            putStrLn ("Moving " ++ iname ++ " from " ++ show srcSt ++ " to " ++
+                      show destSt)
+            commitChange client instData'
+          where
+            instData' =
+              instData { arState = arState'
+                       , tagsToRemove = delCurTag instData
+                       }
+            arState' =
+              if all (== JOB_STATUS_SUCCESS) sts' then
+                ArHealthy $ Just (updateTag $ arData { arResult = Just ArSuccess
+                                                     , arTime = time })
+              else
+                ArFailedRepair (updateTag $ arData { arResult = Just ArFailure
+                                                   , arTime = time })
+
+    _ -> return instData
+
+-- | Update the tag of an 'AutoRepairData' record to match all the other fields.
+updateTag :: AutoRepairData -> AutoRepairData
+updateTag arData =
+  let ini = [autoRepairTypeToRaw $ arType arData,
+             arUuid arData,
+             clockTimeToString $ arTime arData]
+      end = [intercalate "+" . map (show . fromJobId) $ arJobs arData]
+      (pfx, middle) =
+         case arResult arData of
+          Nothing -> (C.autoRepairTagPending, [])
+          Just rs -> (C.autoRepairTagResult, [autoRepairResultToRaw rs])
+  in
+   arData { arTag = pfx ++ intercalate ":" (ini ++ middle ++ end) }
+
+-- | Apply and remove tags from an instance as indicated by 'InstanceData'.
+--
+-- If the /arState/ of the /InstanceData/ record has an associated
+-- 'AutoRepairData', add its tag to the instance object. Additionally, if
+-- /tagsToRemove/ is not empty, remove those tags from the instance object. The
+-- returned /InstanceData/ object always has an empty /tagsToRemove/.
+commitChange :: L.Client -> InstanceData -> IO InstanceData
+commitChange client instData = do
+  let iname = Instance.name $ arInstance instData
+      arData = getArData $ arState instData
+      rmTags = tagsToRemove instData
+      execJobsWaitOk' opcodes = do
+        res <- execJobsWaitOk [map wrapOpCode opcodes] client
+        case res of
+          Ok _ -> return ()
+          Bad e -> exitErr e
+
+  when (isJust arData) $ do
+    let tag = arTag $ fromJust arData
+    putStrLn (">>> Adding the following tag to " ++ iname ++ ":\n" ++ show tag)
+    execJobsWaitOk' [OpTagsSet (TagInstance iname) [tag]]
+
+  unless (null rmTags) $ do
+    putStr (">>> Removing the following tags from " ++ iname ++ ":\n" ++
+            unlines (map show rmTags))
+    execJobsWaitOk' [OpTagsDel (TagInstance iname) rmTags]
+
+  return instData { tagsToRemove = [] }
+
+-- | Detect brokenness with an instance and suggest repair type and jobs to run.
+detectBroken :: Node.List -> Instance.Instance
+             -> Maybe (AutoRepairType, [OpCode])
+detectBroken nl inst =
+  let disk = Instance.diskTemplate inst
+      iname = Instance.name inst
+      offPri = Node.offline $ Container.find (Instance.pNode inst) nl
+      offSec = Node.offline $ Container.find (Instance.sNode inst) nl
+  in
+   case disk of
+     DTDrbd8
+       | offPri && offSec ->
+         Just (
+           ArReinstall,
+           [ OpInstanceRecreateDisks { opInstanceName = iname
+                                     , opRecreateDisksInfo = RecreateDisksAll
+                                     , opNodes = []
+                                       -- FIXME: there should be a better way to
+                                       -- specify opcode parameters than abusing
+                                       -- mkNonEmpty in this way (using the fact
+                                       -- that Maybe is used both for optional
+                                       -- fields, and to express failure).
+                                     , opIallocator = mkNonEmpty "hail"
+                                     }
+           , OpInstanceReinstall { opInstanceName = iname
+                                 , opOsType = Nothing
+                                 , opTempOsParams = Nothing
+                                 , opForceVariant = False
+                                 }
+           ])
+       | offPri ->
+         Just (
+           ArFailover,
+           [ OpInstanceFailover { opInstanceName = iname
+                                  -- FIXME: ditto, see above.
+                                , opShutdownTimeout = fromJust $ mkNonNegative
+                                                      C.defaultShutdownTimeout
+                                , opIgnoreConsistency = False
+                                , opTargetNode = Nothing
+                                , opIgnoreIpolicy = False
+                                , opIallocator = Nothing
+                                }
+           ])
+       | offSec ->
+         Just (
+           ArFixStorage,
+           [ OpInstanceReplaceDisks { opInstanceName = iname
+                                    , opReplaceDisksMode = ReplaceNewSecondary
+                                    , opReplaceDisksList = []
+                                    , opRemoteNode = Nothing
+                                      -- FIXME: ditto, see above.
+                                    , opIallocator = mkNonEmpty "hail"
+                                    , opEarlyRelease = False
+                                    , opIgnoreIpolicy = False
+                                    }
+            ])
+       | otherwise -> Nothing
+
+     DTPlain
+       | offPri ->
+         Just (
+           ArReinstall,
+           [ OpInstanceRecreateDisks { opInstanceName = iname
+                                     , opRecreateDisksInfo = RecreateDisksAll
+                                     , opNodes = []
+                                       -- FIXME: ditto, see above.
+                                     , opIallocator = mkNonEmpty "hail"
+                                     }
+           , OpInstanceReinstall { opInstanceName = iname
+                                 , opOsType = Nothing
+                                 , opTempOsParams = Nothing
+                                 , opForceVariant = False
+                                 }
+           ])
+       | otherwise -> Nothing
+
+     _ -> Nothing  -- Other cases are unimplemented for now: DTDiskless,
+                   -- DTFile, DTSharedFile, DTBlock, DTRbd, DTExt.
+
+-- | Perform the suggested repair on an instance if its policy allows it.
+doRepair :: L.Client     -- ^ The Luxi client
+         -> Double       -- ^ Delay to insert before the first repair opcode
+         -> InstanceData -- ^ The instance data
+         -> (AutoRepairType, [OpCode]) -- ^ The repair job to perform
+         -> IO InstanceData -- ^ The updated instance data
+doRepair client delay instData (rtype, opcodes) =
+  let inst = arInstance instData
+      ipol = Instance.arPolicy inst
+      iname = Instance.name inst
+  in
+  case ipol of
+    ArEnabled maxtype ->
+      if rtype > maxtype then do
+        uuid <- newUUID
+        time <- getClockTime
+
+        let arState' = ArNeedsRepair (
+              updateTag $ AutoRepairData rtype uuid time [] (Just ArEnoperm) "")
+            instData' = instData { arState = arState'
+                                 , tagsToRemove = delCurTag instData
+                                 }
+
+        putStrLn ("Not performing a repair of type " ++ show rtype ++ " on " ++
+          iname ++ " because only repairs up to " ++ show maxtype ++
+          " are allowed")
+        commitChange client instData'  -- Adds "enoperm" result label.
+      else do
+        putStrLn ("Executing " ++ show rtype ++ " repair on " ++ iname)
+
+        -- After submitting the job, we must write an autorepair:pending tag,
+        -- that includes the repair job IDs so that they can be checked later.
+        -- One problem we run into is that the repair job immediately grabs
+        -- locks for the affected instance, and the subsequent TAGS_SET job is
+        -- blocked, introducing an unnecessary delay for the end-user. One
+        -- alternative would be not to wait for the completion of the TAGS_SET
+        -- job, contrary to what commitChange normally does; but we insist on
+        -- waiting for the tag to be set so as to abort in case of failure,
+        -- because the cluster is left in an invalid state in that case.
+        --
+        -- The proper solution (in 2.9+) would be not to use tags for storing
+        -- autorepair data, or make the TAGS_SET opcode not grab an instance's
+        -- locks (if that's deemed safe). In the meantime, we introduce an
+        -- artificial delay in the repair job (via a TestDelay opcode) so that
+        -- once we have the job ID, the TAGS_SET job can complete before the
+        -- repair job actually grabs the locks. (Please note that this is not
+        -- about synchronization, but merely about speeding up the execution of
+        -- the harep tool. If this TestDelay opcode is removed, the program is
+        -- still correct.)
+        let opcodes' =
+              if delay > 0 then
+                OpTestDelay { opDelayDuration = delay
+                            , opDelayOnMaster = True
+                            , opDelayOnNodes = []
+                            , opDelayRepeat = fromJust $ mkNonNegative 0
+                            } : opcodes
+              else
+                opcodes
+
+        uuid <- newUUID
+        time <- getClockTime
+        jids <- submitJobs [map wrapOpCode opcodes'] client
+
+        case jids of
+          Bad e    -> exitErr e
+          Ok jids' ->
+            let arState' = ArPendingRepair (
+                  updateTag $ AutoRepairData rtype uuid time jids' Nothing "")
+                instData' = instData { arState = arState'
+                                     , tagsToRemove = delCurTag instData
+                                     }
+            in
+             commitChange client instData'  -- Adds "pending" label.
+
+    otherSt -> do
+      putStrLn ("Not repairing " ++ iname ++ " because it's in state " ++
+                show otherSt)
+      return instData
+
+-- | Main function.
+main :: Options -> [String] -> IO ()
+main opts args = do
+  unless (null args) $
+    exitErr "this program doesn't take any arguments."
+
+  luxiDef <- Path.defaultLuxiSocket
+  let master = fromMaybe luxiDef $ optLuxi opts
+      opts' = opts { optLuxi = Just master }
+
+  (ClusterData _ nl il _ _) <- loadExternalData opts'
+
+  let iniDataRes = mapM setInitialState $ Container.elems il
+  iniData <- exitIfBad "when parsing auto-repair tags" iniDataRes
+
+  -- First step: check all pending repairs, see if they are completed.
+  iniData' <- bracket (L.getClient master) L.closeClient $
+              forM iniData . processPending
+
+  -- Second step: detect any problems.
+  let repairs = map (detectBroken nl . arInstance) iniData'
+
+  -- Third step: create repair jobs for broken instances that are in ArHealthy.
+  let maybeRepair c (i, r) = maybe (return i) (repairHealthy c i) r
+      jobDelay = optJobDelay opts
+      repairHealthy c i = case arState i of
+                            ArHealthy _ -> doRepair c jobDelay i
+                            _           -> const (return i)
+
+  repairDone <- bracket (L.getClient master) L.closeClient $
+                forM (zip iniData' repairs) . maybeRepair
+
+  -- Print some stats and exit.
+  let states = map ((, 1 :: Int) . arStateName . arState) repairDone
+      counts = Map.fromListWith (+) states
+
+  putStrLn "---------------------"
+  putStrLn "Instance status count"
+  putStrLn "---------------------"
+  putStr . unlines . Map.elems $
+    Map.mapWithKey (\k v -> k ++ ": " ++ show v) counts
index c36e4cf..499c6e4 100644 (file)
@@ -38,6 +38,7 @@ import System.IO.Error (isDoesNotExistError)
 import Ganeti.Common (formatCommands, PersonalityList)
 import Ganeti.HTools.CLI (Options, parseOpts, genericOpts)
 import qualified Ganeti.HTools.Program.Hail as Hail
+import qualified Ganeti.HTools.Program.Harep as Harep
 import qualified Ganeti.HTools.Program.Hbal as Hbal
 import qualified Ganeti.HTools.Program.Hcheck as Hcheck
 import qualified Ganeti.HTools.Program.Hscan as Hscan
@@ -53,6 +54,9 @@ personalities =
                  "Ganeti IAllocator plugin that implements the instance\
                  \ placement and movement using the same algorithm as\
                  \ hbal(1)"))
+  , ("harep",   (Harep.main,   Harep.options,   Harep.arguments,
+                 "auto-repair tool that detects certain kind of problems\
+                 \ with instances and applies the allowed set of solutions"))
   , ("hbal",    (Hbal.main,    Hbal.options,    Hbal.arguments,
                  "cluster balancer that looks at the current state of\
                  \ the cluster and computes a series of steps designed\
index 735ae5e..c394c6c 100644 (file)
@@ -370,9 +370,11 @@ $(THH.declareSADT "AutoRepairType"
 
 -- | The possible auto-repair results.
 $(THH.declareSADT "AutoRepairResult"
-       [ ("ArSuccess", 'C.autoRepairSuccess)
+       -- Order is important here: higher results take precedence when an object
+       -- has several result annotations attached.
+       [ ("ArEnoperm", 'C.autoRepairEnoperm)
+       , ("ArSuccess", 'C.autoRepairSuccess)
        , ("ArFailure", 'C.autoRepairFailure)
-       , ("ArEnoperm", 'C.autoRepairEnoperm)
        ])
 
 -- | The possible auto-repair policy for a given instance.
@@ -389,10 +391,11 @@ data AutoRepairSuspendTime = Forever         -- ^ Permanently suspended
 
 -- | The possible auto-repair states for any given instance.
 data AutoRepairStatus
-  = ArHealthy                      -- ^ No problems detected with the instance
+  = ArHealthy (Maybe AutoRepairData) -- ^ No problems detected with the instance
   | ArNeedsRepair AutoRepairData   -- ^ Instance has problems, no action taken
   | ArPendingRepair AutoRepairData -- ^ Repair jobs ongoing for the instance
   | ArFailedRepair AutoRepairData  -- ^ Some repair jobs for the instance failed
+  deriving (Eq, Show)
 
 -- | The data accompanying a repair operation (future, pending, or failed).
 data AutoRepairData = AutoRepairData { arType :: AutoRepairType
@@ -400,4 +403,6 @@ data AutoRepairData = AutoRepairData { arType :: AutoRepairType
                                      , arTime :: ClockTime
                                      , arJobs :: [JobId]
                                      , arResult :: Maybe AutoRepairResult
+                                     , arTag :: String
                                      }
+                    deriving (Eq, Show)
index 7990a77..179a3a0 100644 (file)
@@ -24,11 +24,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 -}
 
 module Ganeti.Jobs
-  ( execJobsWait
+  ( submitJobs
+  , execJobsWait
+  , execJobsWaitOk
   , waitForJobs
   ) where
 
 import Control.Concurrent (threadDelay)
+import Data.List
 
 import Ganeti.BasicTypes
 import Ganeti.Errors
@@ -36,6 +39,15 @@ import qualified Ganeti.Luxi as L
 import Ganeti.OpCodes
 import Ganeti.Types
 
+-- | Submits a set of jobs and returns their job IDs without waiting for
+-- completion.
+submitJobs :: [[MetaOpCode]] -> L.Client -> IO (Result [L.JobId])
+submitJobs opcodes client = do
+  jids <- L.submitManyJobs client opcodes
+  return (case jids of
+            Bad e    -> Bad $ "Job submission error: " ++ formatError e
+            Ok jids' -> Ok jids')
+
 -- | Executes a set of jobs and waits for their completion, returning their
 -- status.
 execJobsWait :: [[MetaOpCode]]        -- ^ The list of jobs
@@ -43,23 +55,41 @@ execJobsWait :: [[MetaOpCode]]        -- ^ The list of jobs
              -> L.Client              -- ^ The Luxi client
              -> IO (Result [(L.JobId, JobStatus)])
 execJobsWait opcodes callback client = do
-  jids <- L.submitManyJobs client opcodes
+  jids <- submitJobs opcodes client
   case jids of
-    Bad e -> return . Bad $ "Job submission error: " ++ formatError e
+    Bad e -> return $ Bad e
     Ok jids' -> do
       callback jids'
       waitForJobs jids' client
 
--- | Polls a set of jobs at a fixed interval until all are finished
--- one way or another.
+-- | Polls a set of jobs at an increasing interval until all are finished one
+-- way or another.
 waitForJobs :: [L.JobId] -> L.Client -> IO (Result [(L.JobId, JobStatus)])
-waitForJobs jids client = do
-  sts <- L.queryJobsStatus client jids
+waitForJobs jids client = waitForJobs' 500000 15000000
+  where
+    waitForJobs' delay maxdelay = do
+      -- TODO: this should use WaitForJobChange once it's available in Haskell
+      -- land, instead of a fixed schedule of sleeping intervals.
+      threadDelay $ min delay maxdelay
+      sts <- L.queryJobsStatus client jids
+      case sts of
+        Bad e -> return . Bad $ "Checking job status: " ++ formatError e
+        Ok sts' -> if any (<= JOB_STATUS_RUNNING) sts' then
+                     waitForJobs' (delay * 2) maxdelay
+                   else
+                     return . Ok $ zip jids sts'
+
+-- | Execute jobs and return @Ok@ only if all of them succeeded.
+execJobsWaitOk :: [[MetaOpCode]] -> L.Client -> IO (Result ())
+execJobsWaitOk opcodes client = do
+  let nullog = const (return () :: IO ())
+      failed = filter ((/=) JOB_STATUS_SUCCESS . snd)
+      fmtfail (i, s) = show (fromJobId i) ++ "=>" ++ jobStatusToRaw s
+  sts <- execJobsWait opcodes nullog client
   case sts of
-    Bad e -> return . Bad $ "Checking job status: " ++ formatError e
-    Ok sts' -> if any (<= JOB_STATUS_RUNNING) sts'
-            then do
-              -- TODO: replace hardcoded value with a better thing
-              threadDelay (1000000 * 15)
-              waitForJobs jids client
-            else return . Ok $ zip jids sts'
+    Bad e -> return $ Bad e
+    Ok sts' -> return (if null $ failed sts' then
+                         Ok ()
+                       else
+                         Bad ("The following jobs failed: " ++
+                              (intercalate ", " . map fmtfail $ failed sts')))
index 51d166a..6e568e6 100644 (file)
@@ -186,6 +186,7 @@ $(buildObject "Network" "network" $
   , optionalField $
     simpleField "ext_reservations" [t| String |]
   ]
+  ++ uuidFields
   ++ serialFields
   ++ tagsFields)
 
@@ -195,6 +196,9 @@ instance SerialNoObject Network where
 instance TagsObject Network where
   tagsOf = networkTags
 
+instance UuidObject Network where
+  uuidOf = networkUuid
+
 -- * NIC definitions
 
 $(buildParam "Nic" "nicp"
@@ -633,6 +637,7 @@ $(buildObject "ConfigData" "config" $
   , simpleField "nodes"      [t| Container Node      |]
   , simpleField "nodegroups" [t| Container NodeGroup |]
   , simpleField "instances"  [t| Container Instance  |]
+  , simpleField "networks"   [t| Container Network   |]
   ]
   ++ serialFields)
 
index 67d1547..55db65e 100644 (file)
@@ -27,7 +27,8 @@ module Ganeti.Query.Common
   ( rsNoData
   , rsUnavail
   , rsNormal
-  , rsMaybe
+  , rsMaybeNoData
+  , rsMaybeUnavail
   , rsUnknown
   , missingRuntime
   , rpcErrorToStatus
@@ -79,9 +80,17 @@ rsNormal a = ResultEntry RSNormal $ Just (showJSON a)
 -- missing, in which case we return no data). Note that there's some
 -- ambiguity here: in some cases, we mean 'RSNoData', but in other
 -- 'RSUnavail'; this is easy to solve in simple cases, but not in
--- nested dicts.
-rsMaybe :: (JSON a) => Maybe a -> ResultEntry
-rsMaybe = maybe rsNoData rsNormal
+-- nested dicts. If you want to return 'RSUnavail' in case of 'Nothing'
+-- use the function 'rsMaybeUnavail'.
+rsMaybeNoData :: (JSON a) => Maybe a -> ResultEntry
+rsMaybeNoData = maybe rsNoData rsNormal
+
+-- | Helper to declare a result from a 'Maybe'. This version returns
+-- a 'RSUnavail' in case of 'Nothing'. It should be used for optional
+-- fields that are not set. For cases where 'Nothing' means that there
+-- was an error, consider using 'rsMaybe' instead.
+rsMaybeUnavail :: (JSON a) => Maybe a -> ResultEntry
+rsMaybeUnavail = maybe rsUnavail rsNormal
 
 -- | Helper for unknown field result.
 rsUnknown :: ResultEntry
@@ -135,7 +144,7 @@ tagsFields =
 -- levels of maybe: the top level dict might be missing, or one key in
 -- the dictionary might be.
 dictFieldGetter :: (DictObject a) => String -> Maybe a -> ResultEntry
-dictFieldGetter k = maybe rsNoData (rsMaybe . lookup k . toDict)
+dictFieldGetter k = maybe rsNoData (rsMaybeNoData . lookup k . toDict)
 
 -- | Build an optimised lookup map from a Python _PARAMETER_TYPES
 -- association list.
index ae88dfd..7cc52db 100644 (file)
@@ -113,6 +113,7 @@ $(declareSADT "QueryTypeOp"
   , ("QRGroup",    'C.qrGroup )
   , ("QROs",       'C.qrOs )
   , ("QRExport",   'C.qrExport )
+  , ("QRNetwork",  'C.qrNetwork )
   ])
 $(makeJSONInstance ''QueryTypeOp)
 
diff --git a/src/Ganeti/Query/Network.hs b/src/Ganeti/Query/Network.hs
new file mode 100644 (file)
index 0000000..2d262bf
--- /dev/null
@@ -0,0 +1,141 @@
+{-| Implementation of the Ganeti Query2 node group queries.
+
+ -}
+
+{-
+
+Copyright (C) 2012 Google Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+
+-}
+
+module Ganeti.Query.Network
+  ( NetworkRuntime(..)
+  , networkFieldsMap
+  ) where
+
+import qualified Data.Map as Map
+import Data.Maybe (fromMaybe, mapMaybe)
+import Data.List (find)
+
+import Ganeti.JSON
+import Ganeti.Network
+import Ganeti.Objects
+import Ganeti.Query.Language
+import Ganeti.Query.Common
+import Ganeti.Query.Types
+import Ganeti.Types
+
+data NetworkRuntime = NetworkRuntime
+
+networkFields :: FieldList Network NetworkRuntime
+networkFields =
+  [ (FieldDefinition "name" "Name" QFTText "Network name",
+     FieldSimple (rsNormal . networkName), QffNormal)
+  , (FieldDefinition "network" "Subnet" QFTText "IPv4 subnet",
+     FieldSimple (rsNormal . networkNetwork), QffNormal)
+  , (FieldDefinition "gateway" "Gateway" QFTOther "IPv4 gateway",
+     FieldSimple (rsMaybeUnavail . networkGateway), QffNormal)
+  , (FieldDefinition "network6" "IPv6Subnet" QFTOther "IPv6 subnet",
+     FieldSimple (rsMaybeUnavail . networkNetwork6), QffNormal)
+  , (FieldDefinition "gateway6" "IPv6Gateway" QFTOther "IPv6 gateway",
+     FieldSimple (rsMaybeUnavail . networkGateway6), QffNormal)
+  , (FieldDefinition "mac_prefix" "MacPrefix" QFTOther "MAC address prefix",
+     FieldSimple (rsMaybeUnavail . networkMacPrefix), QffNormal)
+  , (FieldDefinition "free_count" "FreeCount" QFTOther "Number of free IPs",
+     FieldSimple (rsMaybeNoData . fmap getFreeCount . createAddressPool),
+     QffNormal)
+  , (FieldDefinition "map" "Map" QFTText "Map of the network's reserved IPs",
+     FieldSimple (rsMaybeNoData . fmap getMap . createAddressPool),
+     QffNormal)
+  , (FieldDefinition "reserved_count" "ReservedCount" QFTOther
+       "Number of reserved IPs",
+     FieldSimple (rsMaybeNoData . fmap getReservedCount . createAddressPool),
+     QffNormal)
+  , (FieldDefinition "group_list" "GroupList" QFTOther "List of node groups",
+     FieldConfig (\cfg -> rsNormal . getGroupConnections cfg . networkUuid),
+     QffNormal)
+  , (FieldDefinition "group_cnt" "GroupCount" QFTOther "Number of node groups",
+     FieldConfig (\cfg -> rsNormal . length . getGroupConnections cfg
+       . networkUuid), QffNormal)
+  , (FieldDefinition "inst_list" "InstanceList" QFTOther "List of instances",
+     FieldConfig (\cfg -> rsNormal . getInstances cfg . networkUuid),
+     QffNormal)
+  , (FieldDefinition "inst_cnt" "InstanceCount" QFTOther "Number of instances",
+     FieldConfig (\cfg -> rsNormal . length . getInstances cfg
+       . networkUuid), QffNormal)
+  ] ++
+  uuidFields "Network" ++
+  serialFields "Network" ++
+  tagsFields
+
+-- | The group fields map.
+networkFieldsMap :: FieldMap Network NetworkRuntime
+networkFieldsMap =
+  Map.fromList $ map (\v@(f, _, _) -> (fdefName f, v)) networkFields
+
+-- TODO: the following fields are not implemented yet: external_reservations,
+-- inst_cnt, inst_list
+
+-- | Given a network's UUID, this function lists all connections from
+-- the network to nodegroups including the respective mode and links.
+getGroupConnections :: ConfigData -> String -> [(String, String, String)]
+getGroupConnections cfg network_uuid =
+  mapMaybe (getGroupConnection network_uuid)
+  ((Map.elems . fromContainer . configNodegroups) cfg)
+
+-- | Given a network's UUID and a node group, this function assembles
+-- a tuple of the group's name, the mode and the link by which the
+-- network is connected to the group. Returns 'Nothing' if the network
+-- is not connected to the group.
+getGroupConnection :: String -> NodeGroup -> Maybe (String, String, String)
+getGroupConnection network_uuid group =
+  let networks = fromContainer . groupNetworks $ group
+  in case Map.lookup network_uuid networks of
+    Nothing -> Nothing
+    Just net ->
+      Just (groupName group, getNicMode net, getNicLink net)
+
+-- | Retrieves the network's mode and formats it human-readable,
+-- also in case it is not available.
+getNicMode :: PartialNicParams -> String
+getNicMode nic_params =
+  maybe "-" nICModeToRaw $ nicpModeP nic_params
+
+-- | Retrieves the network's link and formats it human-readable, also in
+-- case it it not available.
+getNicLink :: PartialNicParams -> String
+getNicLink nic_params = fromMaybe "-" (nicpLinkP nic_params)
+
+-- | Retrieves the network's instances' names.
+getInstances :: ConfigData -> String -> [String]
+getInstances cfg network_uuid =
+  map instName (filter (instIsConnected cfg network_uuid)
+    ((Map.elems . fromContainer . configInstances) cfg))
+
+-- | Helper function that checks if an instance is linked to the given network.
+instIsConnected :: ConfigData -> String -> Instance -> Bool
+instIsConnected cfg network_uuid inst =
+  network_uuid `elem` mapMaybe (getNetworkUuid cfg)
+    (mapMaybe nicNetwork (instNics inst))
+
+-- | Helper function to look up a network's UUID by its name
+getNetworkUuid :: ConfigData -> String -> Maybe String
+getNetworkUuid cfg name =
+  let net = find (\n -> name == fromNonEmpty (networkName n))
+               ((Map.elems . fromContainer . configNetworks) cfg)
+  in fmap networkUuid net
index de6ea1a..53bf6d1 100644 (file)
@@ -158,13 +158,13 @@ nodeFields =
      QffNormal)
   , (FieldDefinition "group" "Group" QFTText "Node group",
      FieldConfig (\cfg node ->
-                    rsMaybe (groupName <$> getGroupOfNode cfg node)),
+                    rsMaybeNoData (groupName <$> getGroupOfNode cfg node)),
      QffNormal)
   , (FieldDefinition "group.uuid" "GroupUUID" QFTText "UUID of node group",
      FieldSimple (rsNormal . nodeGroup), QffNormal)
   ,  (FieldDefinition "ndparams" "NodeParameters" QFTOther
         "Merged node parameters",
-      FieldConfig ((rsMaybe .) . getNodeNdParams), QffNormal)
+      FieldConfig ((rsMaybeNoData .) . getNodeNdParams), QffNormal)
   , (FieldDefinition "custom_ndparams" "CustomNodeParameters" QFTOther
                        "Custom node parameters",
      FieldSimple (rsNormal . nodeNdparams), QffNormal)
index ffdebf8..515d6d4 100644 (file)
@@ -71,6 +71,7 @@ import Ganeti.Query.Filter
 import qualified Ganeti.Query.Job as Query.Job
 import Ganeti.Query.Group
 import Ganeti.Query.Language
+import Ganeti.Query.Network
 import Ganeti.Query.Node
 import Ganeti.Query.Types
 import Ganeti.Path
@@ -197,7 +198,22 @@ queryInner cfg _ (Query (ItemTypeOpCode QRGroup) fields qfilter) wanted =
   fgroups <- filterM (\n -> evaluateFilter cfg Nothing n cfilter) groups
   let fdata = map (\node ->
                        map (execGetter cfg GroupRuntime node) fgetters) fgroups
-  return QueryResult {qresFields = fdefs, qresData = fdata }
+  return QueryResult { qresFields = fdefs, qresData = fdata }
+
+queryInner cfg _ (Query (ItemTypeOpCode QRNetwork) fields qfilter) wanted =
+  return $ do
+  cfilter <- compileFilter networkFieldsMap qfilter
+  let selected = getSelectedFields networkFieldsMap fields
+      (fdefs, fgetters, _) = unzip3 selected
+  networks <- case wanted of
+                [] -> Ok . niceSortKey (fromNonEmpty . networkName) .
+                      Map.elems . fromContainer $ configNetworks cfg
+                _  -> mapM (getNetwork cfg) wanted
+  fnetworks <- filterM (\n -> evaluateFilter cfg Nothing n cfilter) networks
+  let fdata = map (\network ->
+                   map (execGetter cfg NetworkRuntime network) fgetters)
+                   fnetworks
+  return QueryResult { qresFields = fdefs, qresData = fdata }
 
 queryInner _ _ (Query qkind _ _) _ =
   return . Bad . GenericError $ "Query '" ++ show qkind ++ "' not supported"
index 9f02173..0f3a8e1 100644 (file)
@@ -359,7 +359,7 @@ $(THH.declareSADT "IAllocatorMode"
   ])
 $(THH.makeJSONInstance ''IAllocatorMode)
 
--- | Netork mode.
+-- | Network mode.
 $(THH.declareSADT "NICMode"
   [ ("NMBridged", 'C.nicModeBridged)
   , ("NMRouted",  'C.nicModeRouted)
index 18768ee..0ff1984 100644 (file)
@@ -320,19 +320,19 @@ clockTimeToString (TOD t _) = show t
 (which is assumed to be a separator) to be absent from the string if the string
 terminates there.
 
->>> chompPrefix "foo:bar:" "a:b:c"
+\>>> chompPrefix \"foo:bar:\" \"a:b:c\"
 Nothing
 
->>> chompPrefix "foo:bar:" "foo:bar:baz"
-Just "baz"
+\>>> chompPrefix \"foo:bar:\" \"foo:bar:baz\"
+Just \"baz\"
 
->>> chompPrefix "foo:bar:" "foo:bar:"
-Just ""
+\>>> chompPrefix \"foo:bar:\" \"foo:bar:\"
+Just \"\"
 
->>> chompPrefix "foo:bar:" "foo:bar"
-Just ""
+\>>> chompPrefix \"foo:bar:\" \"foo:bar\"
+Just \"\"
 
->>> chompPrefix "foo:bar:" "foo:barbaz"
+\>>> chompPrefix \"foo:bar:\" \"foo:barbaz\"
 Nothing
 -}
 chompPrefix :: String -> String -> Maybe String
diff --git a/test/data/qa-minimal-nodes-instances-only.json b/test/data/qa-minimal-nodes-instances-only.json
new file mode 100644 (file)
index 0000000..38b51b5
--- /dev/null
@@ -0,0 +1,46 @@
+{
+  "name": "xen-test-qa-minimal-nodes-instances-only",
+
+  "disk": ["1G", "512M"],
+  "disk-growth": ["2G", "768M"],
+
+  "nodes": [
+    {
+      "# Master node": null,
+      "primary": "xen-test-0",
+      "secondary": "192.0.2.1"
+    },
+
+    {
+      "primary": "xen-test-1",
+      "secondary": "192.0.2.2"
+    },
+
+    {
+      "primary": "xen-test-2",
+      "secondary": "192.0.2.3"
+    },
+
+    {
+      "primary": "xen-test-3",
+      "secondary": "192.0.2.4"
+    }
+  ],
+
+  "instances": [
+    {
+      "name": "xen-test-inst1",
+      "nic.mac/0": "AA:00:00:11:11:11"
+    },
+    {
+      "name": "xen-test-inst2",
+      "nic.mac/0": "AA:00:00:22:22:22"
+    }
+  ],
+
+  "tests": {
+    "default": false
+  },
+
+  "# vim: set syntax=javascript :": null
+}
diff --git a/test/data/xen-xm-info-4.0.1.txt b/test/data/xen-xm-info-4.0.1.txt
new file mode 100644 (file)
index 0000000..5a678b4
--- /dev/null
@@ -0,0 +1,31 @@
+host                   : host.example.com
+release                : 3.2.0
+version                : #1 SMP Tue Jan  1 00:00:00 UTC 2013
+machine                : x86_64
+nr_cpus                : 4
+nr_nodes               : 1
+cores_per_socket       : 2
+threads_per_core       : 1
+cpu_mhz                : 2800
+hw_caps                : bfebfbff:20100800:00000000:00000940:0004e3bd:00000000:00000001:00000000
+virt_caps              : 
+total_memory           : 16378
+free_memory            : 8004
+node_to_cpu            : node0:0-3
+node_to_memory         : node0:8004
+node_to_dma32_mem      : node0:2985
+max_node_id            : 0
+xen_major              : 4
+xen_minor              : 0
+xen_extra              : .1
+xen_caps               : xen-3.0-x86_64 xen-3.0-x86_32p 
+xen_scheduler          : credit
+xen_pagesize           : 4096
+platform_params        : virt_start=0xffff800000000000
+xen_changeset          : unavailable
+xen_commandline        : placeholder dom0_mem=1024M com1=115200,8n1 console=com1
+cc_compiler            : gcc version 4.4.5 (Debian 4.4.5-8) 
+cc_compile_by          : user
+cc_compile_domain      : example.com
+cc_compile_date        : Tue Jan  1 00:00:00 UTC 2013
+xend_config_format     : 4
diff --git a/test/data/xen-xm-list-4.0.1-dom0-only.txt b/test/data/xen-xm-list-4.0.1-dom0-only.txt
new file mode 100644 (file)
index 0000000..2a022fe
--- /dev/null
@@ -0,0 +1,2 @@
+Name                                        ID   Mem VCPUs      State   Time(s)
+Domain-0                                     0  1023     1     r----- 121152.6
diff --git a/test/data/xen-xm-list-4.0.1-four-instances.txt b/test/data/xen-xm-list-4.0.1-four-instances.txt
new file mode 100644 (file)
index 0000000..05f500e
--- /dev/null
@@ -0,0 +1,5 @@
+Name                                        ID   Mem VCPUs      State   Time(s)
+Domain-0                                     0  1023     1     r----- 154706.1
+server01.example.com                         1  1024     1     -b---- 167643.2
+web3106215069.example.com                    3  4096     1     -b---- 466690.9
+testinstance.example.com                     2  2048     2     r----- 244443.0
index 0d25fb5..b9a505f 100644 (file)
@@ -138,6 +138,9 @@ case_formatCommands =
           [ " hail    - Ganeti IAllocator plugin that implements the instance\
             \ placement and"
           , "           movement using the same algorithm as hbal(1)"
+          , " harep   - auto-repair tool that detects certain kind of problems\
+            \ with instances"
+          , "           and applies the allowed set of solutions"
           , " hbal    - cluster balancer that looks at the current state of\
             \ the cluster and"
           , "           computes a series of steps designed to bring the\
index 65d7659..dc619ec 100644 (file)
@@ -174,8 +174,9 @@ genValidNetwork = do
   gateway6 <- genMaybe genIp6Addr
   res <- liftM Just (genBitString $ netmask2NumHosts netmask)
   ext_res <- liftM Just (genBitString $ netmask2NumHosts netmask)
+  uuid <- arbitrary
   let n = Network name mac_prefix net net6 gateway
-          gateway6 res ext_res 0 Set.empty
+          gateway6 res ext_res uuid 0 Set.empty
   return n
 
 -- | Generate an arbitrary string consisting of '0' and '1' of the given length.
@@ -206,11 +207,13 @@ genEmptyCluster ncount = do
                                 show (map fst nodes'))
                     else GenericContainer nodemap
       continsts = GenericContainer Map.empty
+      networks = GenericContainer Map.empty
   grp <- arbitrary
   let contgroups = GenericContainer $ Map.singleton guuid grp
   serial <- arbitrary
   cluster <- resize 8 arbitrary
-  let c = ConfigData version cluster contnodes contgroups continsts serial
+  let c = ConfigData version cluster contnodes contgroups continsts networks
+            serial
   return c
 
 -- * Test properties
index 3723e04..9b7edcd 100755 (executable)
@@ -90,6 +90,14 @@ class TestConstants(unittest.TestCase):
     self.assertFalse(constants.JOBS_PENDING - constants.JOB_STATUS_ALL)
     self.assertFalse(constants.JOBS_FINALIZED - constants.JOB_STATUS_ALL)
 
+  def testDefaultsForAllHypervisors(self):
+    self.assertEqual(frozenset(constants.HVC_DEFAULTS.keys()),
+                     constants.HYPER_TYPES)
+
+  def testDefaultHypervisor(self):
+    self.assertTrue(constants.DEFAULT_ENABLED_HYPERVISOR in
+                    constants.HYPER_TYPES)
+
 
 class TestExportedNames(unittest.TestCase):
   _VALID_NAME_RE = re.compile(r"^[A-Z][A-Z0-9_]+$")
index b7816f2..c21c749 100755 (executable)
@@ -26,6 +26,7 @@ import tempfile
 import unittest
 import socket
 import os
+import struct
 
 from ganeti import serializer
 from ganeti import constants
@@ -315,5 +316,55 @@ class TestHelpRegexps(testutils.GanetiTestCase):
     self.assertFalse(boot_re.search(help_01590))
 
 
+class TestGetTunFeatures(unittest.TestCase):
+  def testWrongIoctl(self):
+    tmpfile = tempfile.NamedTemporaryFile()
+    # A file does not have the right ioctls, so this must always fail
+    result = hv_kvm._GetTunFeatures(tmpfile.fileno())
+    self.assertTrue(result is None)
+
+  def _FakeIoctl(self, features, fd, request, buf):
+    self.assertEqual(request, hv_kvm.TUNGETFEATURES)
+
+    (reqno, ) = struct.unpack("I", buf)
+    self.assertEqual(reqno, 0)
+
+    return struct.pack("I", features)
+
+  def test(self):
+    tmpfile = tempfile.NamedTemporaryFile()
+    fd = tmpfile.fileno()
+
+    for features in [0, hv_kvm.IFF_VNET_HDR]:
+      fn = compat.partial(self._FakeIoctl, features)
+      result = hv_kvm._GetTunFeatures(fd, _ioctl=fn)
+      self.assertEqual(result, features)
+
+
+class TestProbeTapVnetHdr(unittest.TestCase):
+  def _FakeTunFeatures(self, expected_fd, flags, fd):
+    self.assertEqual(fd, expected_fd)
+    return flags
+
+  def test(self):
+    tmpfile = tempfile.NamedTemporaryFile()
+    fd = tmpfile.fileno()
+
+    for flags in [0, hv_kvm.IFF_VNET_HDR]:
+      fn = compat.partial(self._FakeTunFeatures, fd, flags)
+
+      result = hv_kvm._ProbeTapVnetHdr(fd, _features_fn=fn)
+      if flags == 0:
+        self.assertFalse(result)
+      else:
+        self.assertTrue(result)
+
+  def testUnsupported(self):
+    tmpfile = tempfile.NamedTemporaryFile()
+    fd = tmpfile.fileno()
+
+    self.assertFalse(hv_kvm._ProbeTapVnetHdr(fd, _features_fn=lambda _: None))
+
+
 if __name__ == "__main__":
   testutils.GanetiTestProgram()
index 824a562..057ac76 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 #
 
-# Copyright (C) 2011 Google Inc.
+# Copyright (C) 2011, 2013 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 
 """Script for testing ganeti.hypervisor.hv_lxc"""
 
+import string # pylint: disable=W0402
 import unittest
+import tempfile
+import shutil
+import random
+import os
 
 from ganeti import constants
 from ganeti import objects
 from ganeti import hypervisor
+from ganeti import utils
+from ganeti import errors
+from ganeti import compat
 
 from ganeti.hypervisor import hv_xen
 
 import testutils
 
 
+# Map from hypervisor class to hypervisor name
+HVCLASS_TO_HVNAME = utils.InvertDict(hypervisor._HYPERVISOR_MAP)
+
+
 class TestConsole(unittest.TestCase):
   def test(self):
     for cls in [hv_xen.XenPvmHypervisor, hv_xen.XenHvmHypervisor]:
@@ -63,5 +75,683 @@ class TestCreateConfigCpus(unittest.TestCase):
                       constants.CPU_PINNING_ALL_XEN))
 
 
+class TestParseXmList(testutils.GanetiTestCase):
+  def test(self):
+    data = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt")
+
+    # Exclude node
+    self.assertEqual(hv_xen._ParseXmList(data.splitlines(), False), [])
+
+    # Include node
+    result = hv_xen._ParseXmList(data.splitlines(), True)
+    self.assertEqual(len(result), 1)
+    self.assertEqual(len(result[0]), 6)
+
+    # Name
+    self.assertEqual(result[0][0], hv_xen._DOM0_NAME)
+
+    # ID
+    self.assertEqual(result[0][1], 0)
+
+    # Memory
+    self.assertEqual(result[0][2], 1023)
+
+    # VCPUs
+    self.assertEqual(result[0][3], 1)
+
+    # State
+    self.assertEqual(result[0][4], "r-----")
+
+    # Time
+    self.assertAlmostEqual(result[0][5], 121152.6)
+
+  def testWrongLineFormat(self):
+    tests = [
+      ["three fields only"],
+      ["name InvalidID 128 1 r----- 12345"],
+      ]
+
+    for lines in tests:
+      try:
+        hv_xen._ParseXmList(["Header would be here"] + lines, False)
+      except errors.HypervisorError, err:
+        self.assertTrue("Can't parse output of xm list" in str(err))
+      else:
+        self.fail("Exception was not raised")
+
+
+class TestGetXmList(testutils.GanetiTestCase):
+  def _Fail(self):
+    return utils.RunResult(constants.EXIT_FAILURE, None,
+                           "stdout", "stderr", None,
+                           NotImplemented, NotImplemented)
+
+  def testTimeout(self):
+    fn = testutils.CallCounter(self._Fail)
+    try:
+      hv_xen._GetXmList(fn, False, _timeout=0.1)
+    except errors.HypervisorError, err:
+      self.assertTrue("timeout exceeded" in str(err))
+    else:
+      self.fail("Exception was not raised")
+
+    self.assertTrue(fn.Count() < 10,
+                    msg="'xm list' was called too many times")
+
+  def _Success(self, stdout):
+    return utils.RunResult(constants.EXIT_SUCCESS, None, stdout, "", None,
+                           NotImplemented, NotImplemented)
+
+  def testSuccess(self):
+    data = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt")
+
+    fn = testutils.CallCounter(compat.partial(self._Success, data))
+
+    result = hv_xen._GetXmList(fn, True, _timeout=0.1)
+
+    self.assertEqual(len(result), 4)
+
+    self.assertEqual(map(compat.fst, result), [
+      "Domain-0",
+      "server01.example.com",
+      "web3106215069.example.com",
+      "testinstance.example.com",
+      ])
+
+    self.assertEqual(fn.Count(), 1)
+
+
+class TestParseNodeInfo(testutils.GanetiTestCase):
+  def testEmpty(self):
+    self.assertEqual(hv_xen._ParseNodeInfo(""), {})
+
+  def testUnknownInput(self):
+    data = "\n".join([
+      "foo bar",
+      "something else goes",
+      "here",
+      ])
+    self.assertEqual(hv_xen._ParseNodeInfo(data), {})
+
+  def testBasicInfo(self):
+    data = testutils.ReadTestData("xen-xm-info-4.0.1.txt")
+    result = hv_xen._ParseNodeInfo(data)
+    self.assertEqual(result, {
+      "cpu_nodes": 1,
+      "cpu_sockets": 2,
+      "cpu_total": 4,
+      "hv_version": (4, 0),
+      "memory_free": 8004,
+      "memory_total": 16378,
+      })
+
+
+class TestMergeInstanceInfo(testutils.GanetiTestCase):
+  def testEmpty(self):
+    self.assertEqual(hv_xen._MergeInstanceInfo({}, lambda _: []), {})
+
+  def _FakeXmList(self, include_node):
+    self.assertTrue(include_node)
+    return [
+      (hv_xen._DOM0_NAME, NotImplemented, 4096, 7, NotImplemented,
+       NotImplemented),
+      ("inst1.example.com", NotImplemented, 2048, 4, NotImplemented,
+       NotImplemented),
+      ]
+
+  def testMissingNodeInfo(self):
+    result = hv_xen._MergeInstanceInfo({}, self._FakeXmList)
+    self.assertEqual(result, {
+      "memory_dom0": 4096,
+      "dom0_cpus": 7,
+      })
+
+  def testWithNodeInfo(self):
+    info = testutils.ReadTestData("xen-xm-info-4.0.1.txt")
+    result = hv_xen._GetNodeInfo(info, self._FakeXmList)
+    self.assertEqual(result, {
+      "cpu_nodes": 1,
+      "cpu_sockets": 2,
+      "cpu_total": 4,
+      "dom0_cpus": 7,
+      "hv_version": (4, 0),
+      "memory_dom0": 4096,
+      "memory_free": 8004,
+      "memory_hv": 2230,
+      "memory_total": 16378,
+      })
+
+
+class TestGetConfigFileDiskData(unittest.TestCase):
+  def testLetterCount(self):
+    self.assertEqual(len(hv_xen._DISK_LETTERS), 26)
+
+  def testNoDisks(self):
+    self.assertEqual(hv_xen._GetConfigFileDiskData([], "hd"), [])
+
+  def testManyDisks(self):
+    for offset in [0, 1, 10]:
+      disks = [(objects.Disk(dev_type=constants.LD_LV), "/tmp/disk/%s" % idx)
+               for idx in range(len(hv_xen._DISK_LETTERS) + offset)]
+
+      if offset == 0:
+        result = hv_xen._GetConfigFileDiskData(disks, "hd")
+        self.assertEqual(result, [
+          "'phy:/tmp/disk/%s,hd%s,r'" % (idx, string.ascii_lowercase[idx])
+          for idx in range(len(hv_xen._DISK_LETTERS) + offset)
+          ])
+      else:
+        try:
+          hv_xen._GetConfigFileDiskData(disks, "hd")
+        except errors.HypervisorError, err:
+          self.assertEqual(str(err), "Too many disks")
+        else:
+          self.fail("Exception was not raised")
+
+  def testTwoLvDisksWithMode(self):
+    disks = [
+      (objects.Disk(dev_type=constants.LD_LV, mode=constants.DISK_RDWR),
+       "/tmp/diskFirst"),
+      (objects.Disk(dev_type=constants.LD_LV, mode=constants.DISK_RDONLY),
+       "/tmp/diskLast"),
+      ]
+
+    result = hv_xen._GetConfigFileDiskData(disks, "hd")
+    self.assertEqual(result, [
+      "'phy:/tmp/diskFirst,hda,w'",
+      "'phy:/tmp/diskLast,hdb,r'",
+      ])
+
+  def testFileDisks(self):
+    disks = [
+      (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDWR,
+                    physical_id=[constants.FD_LOOP]),
+       "/tmp/diskFirst"),
+      (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDONLY,
+                    physical_id=[constants.FD_BLKTAP]),
+       "/tmp/diskTwo"),
+      (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDWR,
+                    physical_id=[constants.FD_LOOP]),
+       "/tmp/diskThree"),
+      (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDWR,
+                    physical_id=[constants.FD_BLKTAP]),
+       "/tmp/diskLast"),
+      ]
+
+    result = hv_xen._GetConfigFileDiskData(disks, "sd")
+    self.assertEqual(result, [
+      "'file:/tmp/diskFirst,sda,w'",
+      "'tap:aio:/tmp/diskTwo,sdb,r'",
+      "'file:/tmp/diskThree,sdc,w'",
+      "'tap:aio:/tmp/diskLast,sdd,w'",
+      ])
+
+  def testInvalidFileDisk(self):
+    disks = [
+      (objects.Disk(dev_type=constants.LD_FILE, mode=constants.DISK_RDWR,
+                    physical_id=["#unknown#"]),
+       "/tmp/diskinvalid"),
+      ]
+
+    self.assertRaises(KeyError, hv_xen._GetConfigFileDiskData, disks, "sd")
+
+
+class TestXenHypervisorUnknownCommand(unittest.TestCase):
+  def test(self):
+    cmd = "#unknown command#"
+    self.assertFalse(cmd in constants.KNOWN_XEN_COMMANDS)
+    hv = hv_xen.XenHypervisor(_cfgdir=NotImplemented,
+                              _run_cmd_fn=NotImplemented,
+                              _cmd=cmd)
+    self.assertRaises(errors.ProgrammerError, hv._RunXen, [])
+
+
+class TestXenHypervisorWriteConfigFile(unittest.TestCase):
+  def setUp(self):
+    self.tmpdir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    shutil.rmtree(self.tmpdir)
+
+  def testWriteError(self):
+    cfgdir = utils.PathJoin(self.tmpdir, "foobar")
+
+    hv = hv_xen.XenHypervisor(_cfgdir=cfgdir,
+                              _run_cmd_fn=NotImplemented,
+                              _cmd=NotImplemented)
+
+    self.assertFalse(os.path.exists(cfgdir))
+
+    try:
+      hv._WriteConfigFile("name", "data")
+    except errors.HypervisorError, err:
+      self.assertTrue(str(err).startswith("Cannot write Xen instance"))
+    else:
+      self.fail("Exception was not raised")
+
+
+class _TestXenHypervisor(object):
+  TARGET = NotImplemented
+  CMD = NotImplemented
+  HVNAME = NotImplemented
+
+  def setUp(self):
+    super(_TestXenHypervisor, self).setUp()
+
+    self.tmpdir = tempfile.mkdtemp()
+
+    self.vncpw = "".join(random.sample(string.ascii_letters, 10))
+
+    self.vncpw_path = utils.PathJoin(self.tmpdir, "vncpw")
+    utils.WriteFile(self.vncpw_path, data=self.vncpw)
+
+  def tearDown(self):
+    super(_TestXenHypervisor, self).tearDown()
+
+    shutil.rmtree(self.tmpdir)
+
+  def _GetHv(self, run_cmd=NotImplemented):
+    return self.TARGET(_cfgdir=self.tmpdir, _run_cmd_fn=run_cmd, _cmd=self.CMD)
+
+  def _SuccessCommand(self, stdout, cmd):
+    self.assertEqual(cmd[0], self.CMD)
+
+    return utils.RunResult(constants.EXIT_SUCCESS, None, stdout, "", None,
+                           NotImplemented, NotImplemented)
+
+  def _FailingCommand(self, cmd):
+    self.assertEqual(cmd[0], self.CMD)
+
+    return utils.RunResult(constants.EXIT_FAILURE, None,
+                           "", "This command failed", None,
+                           NotImplemented, NotImplemented)
+
+  def _FakeTcpPing(self, expected, result, target, port, **kwargs):
+    self.assertEqual((target, port), expected)
+    return result
+
+  def testReadingNonExistentConfigFile(self):
+    hv = self._GetHv()
+
+    try:
+      hv._ReadConfigFile("inst15780.example.com")
+    except errors.HypervisorError, err:
+      self.assertTrue(str(err).startswith("Failed to load Xen config file:"))
+    else:
+      self.fail("Exception was not raised")
+
+  def testRemovingAutoConfigFile(self):
+    name = "inst8206.example.com"
+    cfgfile = utils.PathJoin(self.tmpdir, name)
+    autodir = utils.PathJoin(self.tmpdir, "auto")
+    autocfgfile = utils.PathJoin(autodir, name)
+
+    os.mkdir(autodir)
+
+    utils.WriteFile(autocfgfile, data="")
+
+    hv = self._GetHv()
+
+    self.assertTrue(os.path.isfile(autocfgfile))
+    hv._WriteConfigFile(name, "content")
+    self.assertFalse(os.path.exists(autocfgfile))
+    self.assertEqual(utils.ReadFile(cfgfile), "content")
+
+  def _XenList(self, cmd):
+    self.assertEqual(cmd, [self.CMD, "list"])
+
+    # TODO: Use actual data from "xl" command
+    output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt")
+
+    return self._SuccessCommand(output, cmd)
+
+  def testGetInstanceInfo(self):
+    hv = self._GetHv(run_cmd=self._XenList)
+
+    (name, instid, memory, vcpus, state, runtime) = \
+      hv.GetInstanceInfo("server01.example.com")
+
+    self.assertEqual(name, "server01.example.com")
+    self.assertEqual(instid, 1)
+    self.assertEqual(memory, 1024)
+    self.assertEqual(vcpus, 1)
+    self.assertEqual(state, "-b----")
+    self.assertAlmostEqual(runtime, 167643.2)
+
+  def testGetInstanceInfoDom0(self):
+    hv = self._GetHv(run_cmd=self._XenList)
+
+    # TODO: Not sure if this is actually used anywhere (can't find it), but the
+    # code supports querying for Dom0
+    (name, instid, memory, vcpus, state, runtime) = \
+      hv.GetInstanceInfo(hv_xen._DOM0_NAME)
+
+    self.assertEqual(name, "Domain-0")
+    self.assertEqual(instid, 0)
+    self.assertEqual(memory, 1023)
+    self.assertEqual(vcpus, 1)
+    self.assertEqual(state, "r-----")
+    self.assertAlmostEqual(runtime, 154706.1)
+
+  def testGetInstanceInfoUnknown(self):
+    hv = self._GetHv(run_cmd=self._XenList)
+
+    result = hv.GetInstanceInfo("unknown.example.com")
+    self.assertTrue(result is None)
+
+  def testGetAllInstancesInfo(self):
+    hv = self._GetHv(run_cmd=self._XenList)
+
+    result = hv.GetAllInstancesInfo()
+
+    self.assertEqual(map(compat.fst, result), [
+      "server01.example.com",
+      "web3106215069.example.com",
+      "testinstance.example.com",
+      ])
+
+  def testListInstances(self):
+    hv = self._GetHv(run_cmd=self._XenList)
+
+    self.assertEqual(hv.ListInstances(), [
+      "server01.example.com",
+      "web3106215069.example.com",
+      "testinstance.example.com",
+      ])
+
+  def testVerify(self):
+    output = testutils.ReadTestData("xen-xm-info-4.0.1.txt")
+    hv = self._GetHv(run_cmd=compat.partial(self._SuccessCommand,
+                                            output))
+    self.assertTrue(hv.Verify() is None)
+
+  def testVerifyFailing(self):
+    hv = self._GetHv(run_cmd=self._FailingCommand)
+    self.assertTrue("failed:" in hv.Verify())
+
+  def _StartInstanceCommand(self, inst, paused, failcreate, cmd):
+    if cmd == [self.CMD, "info"]:
+      output = testutils.ReadTestData("xen-xm-info-4.0.1.txt")
+    elif cmd == [self.CMD, "list"]:
+      output = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt")
+    elif cmd[:2] == [self.CMD, "create"]:
+      args = cmd[2:]
+      cfgfile = utils.PathJoin(self.tmpdir, inst.name)
+
+      if paused:
+        self.assertEqual(args, ["-p", cfgfile])
+      else:
+        self.assertEqual(args, [cfgfile])
+
+      if failcreate:
+        return self._FailingCommand(cmd)
+
+      output = ""
+    else:
+      self.fail("Unhandled command: %s" % (cmd, ))
+
+    return self._SuccessCommand(output, cmd)
+    #return self._FailingCommand(cmd)
+
+  def _MakeInstance(self):
+    # Copy default parameters
+    bep = objects.FillDict(constants.BEC_DEFAULTS, {})
+    hvp = objects.FillDict(constants.HVC_DEFAULTS[self.HVNAME], {})
+
+    # Override default VNC password file path
+    if constants.HV_VNC_PASSWORD_FILE in hvp:
+      hvp[constants.HV_VNC_PASSWORD_FILE] = self.vncpw_path
+
+    disks = [
+      (objects.Disk(dev_type=constants.LD_LV, mode=constants.DISK_RDWR),
+       utils.PathJoin(self.tmpdir, "disk0")),
+      (objects.Disk(dev_type=constants.LD_LV, mode=constants.DISK_RDONLY),
+       utils.PathJoin(self.tmpdir, "disk1")),
+      ]
+
+    inst = objects.Instance(name="server01.example.com",
+                            hvparams=hvp, beparams=bep,
+                            osparams={}, nics=[], os="deb1",
+                            disks=map(compat.fst, disks))
+    inst.UpgradeConfig()
+
+    return (inst, disks)
+
+  def testStartInstance(self):
+    (inst, disks) = self._MakeInstance()
+
+    for failcreate in [False, True]:
+      for paused in [False, True]:
+        run_cmd = compat.partial(self._StartInstanceCommand,
+                                 inst, paused, failcreate)
+
+        hv = self._GetHv(run_cmd=run_cmd)
+
+        # Ensure instance is not listed
+        self.assertTrue(inst.name not in hv.ListInstances())
+
+        # Remove configuration
+        cfgfile = utils.PathJoin(self.tmpdir, inst.name)
+        utils.RemoveFile(cfgfile)
+
+        if failcreate:
+          self.assertRaises(errors.HypervisorError, hv.StartInstance,
+                            inst, disks, paused)
+        else:
+          hv.StartInstance(inst, disks, paused)
+
+        # Check if configuration was updated
+        lines = utils.ReadFile(cfgfile).splitlines()
+
+        if constants.HV_VNC_PASSWORD_FILE in inst.hvparams:
+          self.assertTrue(("vncpasswd = '%s'" % self.vncpw) in lines)
+        else:
+          extra = inst.hvparams[constants.HV_KERNEL_ARGS]
+          self.assertTrue(("extra = '%s'" % extra) in lines)
+
+  def _StopInstanceCommand(self, instance_name, force, fail, cmd):
+    if ((force and cmd[:2] == [self.CMD, "destroy"]) or
+        (not force and cmd[:2] == [self.CMD, "shutdown"])):
+      self.assertEqual(cmd[2:], [instance_name])
+      output = ""
+    else:
+      self.fail("Unhandled command: %s" % (cmd, ))
+
+    if fail:
+      # Simulate a failing command
+      return self._FailingCommand(cmd)
+    else:
+      return self._SuccessCommand(output, cmd)
+
+  def testStopInstance(self):
+    name = "inst4284.example.com"
+    cfgfile = utils.PathJoin(self.tmpdir, name)
+    cfgdata = "config file content\n"
+
+    for force in [False, True]:
+      for fail in [False, True]:
+        utils.WriteFile(cfgfile, data=cfgdata)
+
+        run_cmd = compat.partial(self._StopInstanceCommand, name, force, fail)
+
+        hv = self._GetHv(run_cmd=run_cmd)
+
+        self.assertTrue(os.path.isfile(cfgfile))
+
+        if fail:
+          try:
+            hv._StopInstance(name, force)
+          except errors.HypervisorError, err:
+            self.assertTrue(str(err).startswith("Failed to stop instance"))
+          else:
+            self.fail("Exception was not raised")
+          self.assertEqual(utils.ReadFile(cfgfile), cfgdata,
+                           msg=("Configuration was removed when stopping"
+                                " instance failed"))
+        else:
+          hv._StopInstance(name, force)
+          self.assertFalse(os.path.exists(cfgfile))
+
+  def _MigrateNonRunningInstCmd(self, cmd):
+    if cmd == [self.CMD, "list"]:
+      output = testutils.ReadTestData("xen-xm-list-4.0.1-dom0-only.txt")
+    else:
+      self.fail("Unhandled command: %s" % (cmd, ))
+
+    return self._SuccessCommand(output, cmd)
+
+  def testMigrateInstanceNotRunning(self):
+    name = "nonexistinginstance.example.com"
+    target = constants.IP4_ADDRESS_LOCALHOST
+    port = 14618
+
+    hv = self._GetHv(run_cmd=self._MigrateNonRunningInstCmd)
+
+    for live in [False, True]:
+      try:
+        hv._MigrateInstance(NotImplemented, name, target, port, live,
+                            _ping_fn=NotImplemented)
+      except errors.HypervisorError, err:
+        self.assertEqual(str(err), "Instance not running, cannot migrate")
+      else:
+        self.fail("Exception was not raised")
+
+  def _MigrateInstTargetUnreachCmd(self, cmd):
+    if cmd == [self.CMD, "list"]:
+      output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt")
+    else:
+      self.fail("Unhandled command: %s" % (cmd, ))
+
+    return self._SuccessCommand(output, cmd)
+
+  def testMigrateTargetUnreachable(self):
+    name = "server01.example.com"
+    target = constants.IP4_ADDRESS_LOCALHOST
+    port = 28349
+
+    hv = self._GetHv(run_cmd=self._MigrateInstTargetUnreachCmd)
+
+    for live in [False, True]:
+      if self.CMD == constants.XEN_CMD_XL:
+        # TODO: Detect unreachable targets
+        pass
+      else:
+        try:
+          hv._MigrateInstance(NotImplemented, name, target, port, live,
+                              _ping_fn=compat.partial(self._FakeTcpPing,
+                                                      (target, port), False))
+        except errors.HypervisorError, err:
+          wanted = "Remote host %s not" % target
+          self.assertTrue(str(err).startswith(wanted))
+        else:
+          self.fail("Exception was not raised")
+
+  def _MigrateInstanceCmd(self, cluster_name, instance_name, target, port,
+                          live, fail, cmd):
+    if cmd == [self.CMD, "list"]:
+      output = testutils.ReadTestData("xen-xm-list-4.0.1-four-instances.txt")
+    elif cmd[:2] == [self.CMD, "migrate"]:
+      if self.CMD == constants.XEN_CMD_XM:
+        args = ["-p", str(port)]
+
+        if live:
+          args.append("-l")
+
+      elif self.CMD == constants.XEN_CMD_XL:
+        args = [
+          "-s", constants.XL_SSH_CMD % cluster_name,
+          "-C", utils.PathJoin(self.tmpdir, instance_name),
+          ]
+
+      else:
+        self.fail("Unknown Xen command '%s'" % self.CMD)
+
+      args.extend([instance_name, target])
+      self.assertEqual(cmd[2:], args)
+
+      if fail:
+        return self._FailingCommand(cmd)
+
+      output = ""
+    else:
+      self.fail("Unhandled command: %s" % (cmd, ))
+
+    return self._SuccessCommand(output, cmd)
+
+  def testMigrateInstance(self):
+    clustername = "cluster.example.com"
+    instname = "server01.example.com"
+    target = constants.IP4_ADDRESS_LOCALHOST
+    port = 22364
+
+    for live in [False, True]:
+      for fail in [False, True]:
+        ping_fn = \
+          testutils.CallCounter(compat.partial(self._FakeTcpPing,
+                                               (target, port), True))
+
+        run_cmd = \
+          compat.partial(self._MigrateInstanceCmd,
+                         clustername, instname, target, port, live,
+                         fail)
+
+        hv = self._GetHv(run_cmd=run_cmd)
+
+        if fail:
+          try:
+            hv._MigrateInstance(clustername, instname, target, port, live,
+                                _ping_fn=ping_fn)
+          except errors.HypervisorError, err:
+            self.assertTrue(str(err).startswith("Failed to migrate instance"))
+          else:
+            self.fail("Exception was not raised")
+        else:
+          hv._MigrateInstance(clustername, instname, target, port, live,
+                              _ping_fn=ping_fn)
+
+        if self.CMD == constants.XEN_CMD_XM:
+          expected_pings = 1
+        else:
+          expected_pings = 0
+
+        self.assertEqual(ping_fn.Count(), expected_pings)
+
+
+def _MakeTestClass(cls, cmd):
+  """Makes a class for testing.
+
+  The returned class has structure as shown in the following pseudo code:
+
+    class Test{cls.__name__}{cmd}(_TestXenHypervisor, unittest.TestCase):
+      TARGET = {cls}
+      CMD = {cmd}
+      HVNAME = {Hypervisor name retrieved using class}
+
+  @type cls: class
+  @param cls: Hypervisor class to be tested
+  @type cmd: string
+  @param cmd: Hypervisor command
+  @rtype: tuple
+  @return: Class name and class object (not instance)
+
+  """
+  name = "Test%sCmd%s" % (cls.__name__, cmd.title())
+  bases = (_TestXenHypervisor, unittest.TestCase)
+  hvname = HVCLASS_TO_HVNAME[cls]
+
+  return (name, type(name, bases, dict(TARGET=cls, CMD=cmd, HVNAME=hvname)))
+
+
+# Create test classes programmatically instead of manually to reduce the risk
+# of forgetting some combinations
+for cls in [hv_xen.XenPvmHypervisor, hv_xen.XenHvmHypervisor]:
+  for cmd in constants.KNOWN_XEN_COMMANDS:
+    (name, testcls) = _MakeTestClass(cls, cmd)
+
+    assert name not in locals()
+
+    locals()[name] = testcls
+
+
 if __name__ == "__main__":
   testutils.GanetiTestProgram()
index 9c4595d..fc9a602 100755 (executable)
@@ -216,6 +216,37 @@ class TestClusterObject(unittest.TestCase):
     self.assertEqual(self.fake_cl.primary_hypervisor, constants.HT_CHROOT)
 
 
+class TestClusterObjectTcpUdpPortPool(unittest.TestCase):
+  def testNewCluster(self):
+    self.assertTrue(objects.Cluster().tcpudp_port_pool is None)
+
+  def testSerializingEmpty(self):
+    self.assertEqual(objects.Cluster().ToDict(), {
+      "tcpudp_port_pool": [],
+      })
+
+  def testSerializing(self):
+    cluster = objects.Cluster.FromDict({})
+    self.assertEqual(cluster.tcpudp_port_pool, set())
+
+    cluster.tcpudp_port_pool.add(3546)
+    cluster.tcpudp_port_pool.add(62511)
+
+    data = cluster.ToDict()
+    self.assertEqual(data.keys(), ["tcpudp_port_pool"])
+    self.assertEqual(sorted(data["tcpudp_port_pool"]), sorted([3546, 62511]))
+
+  def testDeserializingEmpty(self):
+    cluster = objects.Cluster.FromDict({})
+    self.assertEqual(cluster.tcpudp_port_pool, set())
+
+  def testDeserialize(self):
+    cluster = objects.Cluster.FromDict({
+      "tcpudp_port_pool": [26214, 10039, 267],
+      })
+    self.assertEqual(cluster.tcpudp_port_pool, set([26214, 10039, 267]))
+
+
 class TestOS(unittest.TestCase):
   ALL_DATA = [
     "debootstrap",
index 93f5212..22d2177 100755 (executable)
@@ -46,5 +46,62 @@ class TestAutoSlot(unittest.TestCase):
     slotted = AutoSlotted()
     self.assertEqual(slotted.__slots__, AutoSlotted.SLOTS)
 
+
+class TestContainerToDicts(unittest.TestCase):
+  def testUnknownType(self):
+    for value in [None, 19410, "xyz"]:
+      try:
+        outils.ContainerToDicts(value)
+      except TypeError, err:
+        self.assertTrue(str(err).startswith("Unknown container type"))
+      else:
+        self.fail("Exception was not raised")
+
+  def testEmptyDict(self):
+    value = {}
+    self.assertFalse(type(value) in outils._SEQUENCE_TYPES)
+    self.assertEqual(outils.ContainerToDicts(value), {})
+
+  def testEmptySequences(self):
+    for cls in [list, tuple, set, frozenset]:
+      self.assertEqual(outils.ContainerToDicts(cls()), [])
+
+
+class _FakeWithFromDict:
+  def FromDict(self, _):
+    raise NotImplemented
+
+
+class TestContainerFromDicts(unittest.TestCase):
+  def testUnknownType(self):
+    for cls in [str, int, bool]:
+      try:
+        outils.ContainerFromDicts(None, cls, NotImplemented)
+      except TypeError, err:
+        self.assertTrue(str(err).startswith("Unknown container type"))
+      else:
+        self.fail("Exception was not raised")
+
+      try:
+        outils.ContainerFromDicts(None, cls(), NotImplemented)
+      except TypeError, err:
+        self.assertTrue(str(err).endswith("is not a type"))
+      else:
+        self.fail("Exception was not raised")
+
+  def testEmptyDict(self):
+    value = {}
+    self.assertFalse(type(value) in outils._SEQUENCE_TYPES)
+    self.assertEqual(outils.ContainerFromDicts(value, dict,
+                                                    NotImplemented),
+                     {})
+
+  def testEmptySequences(self):
+    for cls in [list, tuple, set, frozenset]:
+      self.assertEqual(outils.ContainerFromDicts([], cls,
+                                                      _FakeWithFromDict),
+                       cls())
+
+
 if __name__ == "__main__":
   testutils.GanetiTestProgram()
index e47b1f8..0f216da 100755 (executable)
@@ -108,10 +108,10 @@ class TestPreparePaths(_EnvVarTest):
 
 class TestMakeNodeRoot(unittest.TestCase):
   def test(self):
-    self.assertRaises(RuntimeError, vcluster._MakeNodeRoot, "/tmp", "/")
+    self.assertRaises(RuntimeError, vcluster.MakeNodeRoot, "/tmp", "/")
 
     for i in ["/tmp", "/tmp/", "/tmp///"]:
-      self.assertEqual(vcluster._MakeNodeRoot(i, "other.example.com"),
+      self.assertEqual(vcluster.MakeNodeRoot(i, "other.example.com"),
                        "/tmp/other.example.com")
 
 
index fd73322..70b5965 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 #
 
-# Copyright (C) 2012 Google Inc.
+# Copyright (C) 2012, 2013 Google Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 """Script for testing qa.qa_config"""
 
 import unittest
+import tempfile
+import shutil
+import os
+import operator
+
+from ganeti import utils
+from ganeti import serializer
+from ganeti import constants
+from ganeti import compat
 
 from qa import qa_config
+from qa import qa_error
 
 import testutils
 
@@ -133,5 +143,207 @@ class TestTestEnabled(unittest.TestCase):
         }))
 
 
+class TestQaConfigLoad(unittest.TestCase):
+  def setUp(self):
+    self.tmpdir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    shutil.rmtree(self.tmpdir)
+
+  def testLoadNonExistent(self):
+    filename = utils.PathJoin(self.tmpdir, "does.not.exist")
+    self.assertRaises(EnvironmentError, qa_config._QaConfig.Load, filename)
+
+  @staticmethod
+  def _WriteConfig(filename, data):
+    utils.WriteFile(filename, data=serializer.DumpJson(data))
+
+  def _CheckLoadError(self, filename, data, expected):
+    self._WriteConfig(filename, data)
+
+    try:
+      qa_config._QaConfig.Load(filename)
+    except qa_error.Error, err:
+      self.assertTrue(str(err).startswith(expected))
+    else:
+      self.fail("Exception was not raised")
+
+  def testFailsValidation(self):
+    filename = utils.PathJoin(self.tmpdir, "qa.json")
+    testconfig = {}
+
+    check_fn = compat.partial(self._CheckLoadError, filename, testconfig)
+
+    # No nodes
+    check_fn("Need at least one node")
+
+    testconfig["nodes"] = [
+      {
+        "primary": "xen-test-0",
+        "secondary": "192.0.2.1",
+        },
+      ]
+
+    # No instances
+    check_fn("Need at least one instance")
+
+    testconfig["instances"] = [
+      {
+        "name": "xen-test-inst1",
+        },
+      ]
+
+    # Missing "disk" and "disk-growth"
+    check_fn("Config options 'disk' and 'disk-growth' ")
+
+    testconfig["disk"] = []
+    testconfig["disk-growth"] = testconfig["disk"]
+
+    # Minimal accepted configuration
+    self._WriteConfig(filename, testconfig)
+    result = qa_config._QaConfig.Load(filename)
+    self.assertTrue(result.get("nodes"))
+
+    # Non-existent instance check script
+    testconfig[qa_config._INSTANCE_CHECK_KEY] = \
+      utils.PathJoin(self.tmpdir, "instcheck")
+    check_fn("Can't find instance check script")
+    del testconfig[qa_config._INSTANCE_CHECK_KEY]
+
+    # No enabled hypervisor
+    testconfig[qa_config._ENABLED_HV_KEY] = None
+    check_fn("No hypervisor is enabled")
+
+    # Unknown hypervisor
+    testconfig[qa_config._ENABLED_HV_KEY] = ["#unknownhv#"]
+    check_fn("Unknown hypervisor(s) enabled:")
+
+
+class TestQaConfigWithSampleConfig(unittest.TestCase):
+  """Tests using C{qa-sample.json}.
+
+  This test case serves two purposes:
+
+    - Ensure shipped C{qa-sample.json} file is considered a valid QA
+      configuration
+    - Test some functions of L{qa_config._QaConfig} without having to
+      mock a whole configuration file
+
+  """
+  def setUp(self):
+    filename = "%s/qa/qa-sample.json" % testutils.GetSourceDir()
+
+    self.config = qa_config._QaConfig.Load(filename)
+
+  def testGetEnabledHypervisors(self):
+    self.assertEqual(self.config.GetEnabledHypervisors(),
+                     [constants.DEFAULT_ENABLED_HYPERVISOR])
+
+  def testGetDefaultHypervisor(self):
+    self.assertEqual(self.config.GetDefaultHypervisor(),
+                     constants.DEFAULT_ENABLED_HYPERVISOR)
+
+  def testGetInstanceCheckScript(self):
+    self.assertTrue(self.config.GetInstanceCheckScript() is None)
+
+  def testGetAndGetItem(self):
+    self.assertEqual(self.config["nodes"], self.config.get("nodes"))
+
+  def testGetMasterNode(self):
+    self.assertEqual(self.config.GetMasterNode(), self.config["nodes"][0])
+
+
+class TestQaConfig(unittest.TestCase):
+  def setUp(self):
+    filename = \
+      testutils.TestDataFilename("qa-minimal-nodes-instances-only.json")
+
+    self.config = qa_config._QaConfig.Load(filename)
+
+  def testExclusiveStorage(self):
+    self.assertRaises(AssertionError, self.config.GetExclusiveStorage)
+
+    for value in [False, True, 0, 1, 30804, ""]:
+      self.config.SetExclusiveStorage(value)
+      self.assertEqual(self.config.GetExclusiveStorage(), bool(value))
+
+      for template in constants.DISK_TEMPLATES:
+        if value and template not in constants.DTS_EXCL_STORAGE:
+          self.assertFalse(self.config.IsTemplateSupported(template))
+        else:
+          self.assertTrue(self.config.IsTemplateSupported(template))
+
+  def testInstanceConversion(self):
+    self.assertTrue(isinstance(self.config["instances"][0],
+                               qa_config._QaInstance))
+
+  def testNodeConversion(self):
+    self.assertTrue(isinstance(self.config["nodes"][0],
+                               qa_config._QaNode))
+
+  def testAcquireAndReleaseInstance(self):
+    self.assertFalse(compat.any(map(operator.attrgetter("used"),
+                                    self.config["instances"])))
+
+    inst = qa_config.AcquireInstance(_cfg=self.config)
+    self.assertTrue(inst.used)
+    self.assertTrue(inst.disk_template is None)
+
+    inst.Release()
+
+    self.assertFalse(inst.used)
+    self.assertTrue(inst.disk_template is None)
+
+    self.assertFalse(compat.any(map(operator.attrgetter("used"),
+                                    self.config["instances"])))
+
+  def testAcquireInstanceTooMany(self):
+    # Acquire all instances
+    for _ in range(len(self.config["instances"])):
+      inst = qa_config.AcquireInstance(_cfg=self.config)
+      self.assertTrue(inst.used)
+      self.assertTrue(inst.disk_template is None)
+
+    # The next acquisition must fail
+    self.assertRaises(qa_error.OutOfInstancesError,
+                      qa_config.AcquireInstance, _cfg=self.config)
+
+  def testAcquireNodeNoneAdded(self):
+    self.assertFalse(compat.any(map(operator.attrgetter("added"),
+                                    self.config["nodes"])))
+
+    # First call must return master node
+    node = qa_config.AcquireNode(_cfg=self.config)
+    self.assertEqual(node, self.config.GetMasterNode())
+
+    # Next call with exclusion list fails
+    self.assertRaises(qa_error.OutOfNodesError, qa_config.AcquireNode,
+                      exclude=[node], _cfg=self.config)
+
+  def testAcquireNodeTooMany(self):
+    # Mark all nodes as marked (master excluded)
+    for node in self.config["nodes"]:
+      if node != self.config.GetMasterNode():
+        node.MarkAdded()
+
+    nodecount = len(self.config["nodes"])
+
+    self.assertTrue(nodecount > 1)
+
+    acquired = []
+
+    for _ in range(nodecount):
+      node = qa_config.AcquireNode(exclude=acquired, _cfg=self.config)
+      if node == self.config.GetMasterNode():
+        self.assertFalse(node.added)
+      else:
+        self.assertTrue(node.added)
+      self.assertEqual(node.use_count, 1)
+      acquired.append(node)
+
+    self.assertRaises(qa_error.OutOfNodesError, qa_config.AcquireNode,
+                      exclude=acquired, _cfg=self.config)
+
+
 if __name__ == "__main__":
   testutils.GanetiTestProgram()