Fix a bug in the Runtime tests

[ganeti-local] / qa / qa_utils.py
diff --git a/qa/qa_utils.py b/qa/qa_utils.py

index a4af630..774ccc9 100644 (file)
--- a/qa/qa_utils.py
+++ b/qa/qa_utils.py
@@ -1,7 +1,7 @@
  #
  #
  
-# Copyright (C) 2007 Google Inc.
+# Copyright (C) 2007, 2011, 2012, 2013 Google Inc.
  #
  # This program is free software; you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
@@ -27,8 +27,19 @@ import os
  import re
  import sys
  import subprocess
+import random
+import tempfile
+
+try:
+  import functools
+except ImportError, err:
+  raise ImportError("Python 2.5 or higher is required: %s" % err)
  
  from ganeti import utils
+from ganeti import compat
+from ganeti import constants
+from ganeti import ht
+from ganeti import pathutils
  
  import qa_config
  import qa_error
@@ -39,11 +50,28 @@ _WARNING_SEQ = None
  _ERROR_SEQ = None
  _RESET_SEQ = None
  
+_MULTIPLEXERS = {}
+
+#: Unique ID per QA run
+_RUN_UUID = utils.NewUUID()
+
+#: Path to the QA query output log file
+_QA_OUTPUT = pathutils.GetLogFilename("qa-output")
+
+
+(INST_DOWN,
+ INST_UP) = range(500, 502)
+
+(FIRST_ARG,
+ RETURN_VALUE) = range(1000, 1002)
+
  
  def _SetupColours():
    """Initializes the colour constants.
  
    """
+  # pylint: disable=W0603
+  # due to global usage
    global _INFO_SEQ, _WARNING_SEQ, _ERROR_SEQ, _RESET_SEQ
  
    # Don't use colours if stdout isn't a terminal
@@ -74,23 +102,23 @@ def AssertIn(item, sequence):
  
    """
    if item not in sequence:
-    raise qa_error.Error('%r not in %r' % (item, sequence))
+    raise qa_error.Error("%r not in %r" % (item, sequence))
  
  
-def AssertEqual(first, second):
-  """Raises an error when values aren't equal.
+def AssertNotIn(item, sequence):
+  """Raises an error when item is in sequence.
  
    """
-  if not first == second:
-    raise qa_error.Error('%r == %r' % (first, second))
+  if item in sequence:
+    raise qa_error.Error("%r in %r" % (item, sequence))
  
  
-def AssertNotEqual(first, second):
-  """Raises an error when values are equal.
+def AssertEqual(first, second):
+  """Raises an error when values aren't equal.
  
    """
-  if not first != second:
-    raise qa_error.Error('%r != %r' % (first, second))
+  if not first == second:
+    raise qa_error.Error("%r == %r" % (first, second))
  
  
  def AssertMatch(string, pattern):
@@ -101,47 +129,203 @@ def AssertMatch(string, pattern):
      raise qa_error.Error("%r doesn't match /%r/" % (string, pattern))
  
  
-def GetSSHCommand(node, cmd, strict=True):
+def _GetName(entity, key):
+  """Tries to get name of an entity.
+
+  @type entity: string or dict
+  @type key: string
+  @param key: Dictionary key containing name
+
+  """
+  if isinstance(entity, basestring):
+    result = entity
+  elif isinstance(entity, dict):
+    result = entity[key]
+  else:
+    raise qa_error.Error("Expected string or dictionary, got %s: %s" %
+                         (type(entity), entity))
+
+  if not ht.TNonEmptyString(result):
+    raise Exception("Invalid name '%s'" % result)
+
+  return result
+
+
+def _AssertRetCode(rcode, fail, cmdstr, nodename):
+  """Check the return value from a command and possibly raise an exception.
+
+  """
+  if fail and rcode == 0:
+    raise qa_error.Error("Command '%s' on node %s was expected to fail but"
+                         " didn't" % (cmdstr, nodename))
+  elif not fail and rcode != 0:
+    raise qa_error.Error("Command '%s' on node %s failed, exit code %s" %
+                         (cmdstr, nodename, rcode))
+
+
+def AssertCommand(cmd, fail=False, node=None, log_cmd=True):
+  """Checks that a remote command succeeds.
+
+  @param cmd: either a string (the command to execute) or a list (to
+      be converted using L{utils.ShellQuoteArgs} into a string)
+  @type fail: boolean
+  @param fail: if the command is expected to fail instead of succeeding
+  @param node: if passed, it should be the node on which the command
+      should be executed, instead of the master node (can be either a
+      dict or a string)
+  @param log_cmd: if False, the command won't be logged (simply passed to
+      StartSSH)
+  @return: the return code of the command
+  @raise qa_error.Error: if the command fails when it shouldn't or vice versa
+
+  """
+  if node is None:
+    node = qa_config.GetMasterNode()
+
+  nodename = _GetName(node, "primary")
+
+  if isinstance(cmd, basestring):
+    cmdstr = cmd
+  else:
+    cmdstr = utils.ShellQuoteArgs(cmd)
+
+  rcode = StartSSH(nodename, cmdstr, log_cmd=log_cmd).wait()
+  _AssertRetCode(rcode, fail, cmdstr, nodename)
+
+  return rcode
+
+
+def AssertRedirectedCommand(cmd, fail=False, node=None, log_cmd=True):
+  """Executes a command with redirected output.
+
+  The log will go to the qa-output log file in the ganeti log
+  directory on the node where the command is executed. The fail and
+  node parameters are passed unchanged to AssertCommand.
+
+  @param cmd: the command to be executed, as a list; a string is not
+      supported
+
+  """
+  if not isinstance(cmd, list):
+    raise qa_error.Error("Non-list passed to AssertRedirectedCommand")
+  ofile = utils.ShellQuote(_QA_OUTPUT)
+  cmdstr = utils.ShellQuoteArgs(cmd)
+  AssertCommand("echo ---- $(date) %s ---- >> %s" % (cmdstr, ofile),
+                fail=False, node=node, log_cmd=False)
+  return AssertCommand(cmdstr + " >> %s" % ofile,
+                       fail=fail, node=node, log_cmd=log_cmd)
+
+
+def GetSSHCommand(node, cmd, strict=True, opts=None, tty=None):
    """Builds SSH command to be executed.
  
-  Args:
-  - node: Node the command should run on
-  - cmd: Command to be executed as a list with all parameters
-  - strict: Whether to enable strict host key checking
+  @type node: string
+  @param node: node the command should run on
+  @type cmd: string
+  @param cmd: command to be executed in the node; if None or empty
+      string, no command will be executed
+  @type strict: boolean
+  @param strict: whether to enable strict host key checking
+  @type opts: list
+  @param opts: list of additional options
+  @type tty: boolean or None
+  @param tty: if we should use tty; if None, will be auto-detected
  
    """
-  args = [ 'ssh', '-oEscapeChar=none', '-oBatchMode=yes', '-l', 'root', '-t' ]
+  args = ["ssh", "-oEscapeChar=none", "-oBatchMode=yes", "-lroot"]
+
+  if tty is None:
+    tty = sys.stdout.isatty()
+
+  if tty:
+    args.append("-t")
  
    if strict:
-    tmp = 'yes'
+    tmp = "yes"
    else:
-    tmp = 'no'
-  args.append('-oStrictHostKeyChecking=%s' % tmp)
-  args.append('-oClearAllForwardings=yes')
-  args.append('-oForwardAgent=yes')
+    tmp = "no"
+  args.append("-oStrictHostKeyChecking=%s" % tmp)
+  args.append("-oClearAllForwardings=yes")
+  args.append("-oForwardAgent=yes")
+  if opts:
+    args.extend(opts)
+  if node in _MULTIPLEXERS:
+    spath = _MULTIPLEXERS[node][0]
+    args.append("-oControlPath=%s" % spath)
+    args.append("-oControlMaster=no")
    args.append(node)
-  args.append(cmd)
-
-  print 'SSH:', utils.ShellQuoteArgs(args)
+  if cmd:
+    args.append(cmd)
  
    return args
  
  
-def StartSSH(node, cmd, strict=True):
+def StartLocalCommand(cmd, _nolog_opts=False, log_cmd=True, **kwargs):
+  """Starts a local command.
+
+  """
+  if log_cmd:
+    if _nolog_opts:
+      pcmd = [i for i in cmd if not i.startswith("-")]
+    else:
+      pcmd = cmd
+    print "Command: %s" % utils.ShellQuoteArgs(pcmd)
+  return subprocess.Popen(cmd, shell=False, **kwargs)
+
+
+def StartSSH(node, cmd, strict=True, log_cmd=True):
    """Starts SSH.
  
    """
-  return subprocess.Popen(GetSSHCommand(node, cmd, strict=strict),
-                          shell=False)
+  return StartLocalCommand(GetSSHCommand(node, cmd, strict=strict),
+                           _nolog_opts=True, log_cmd=log_cmd)
+
+
+def StartMultiplexer(node):
+  """Starts a multiplexer command.
+
+  @param node: the node for which to open the multiplexer
+
+  """
+  if node in _MULTIPLEXERS:
+    return
+
+  # Note: yes, we only need mktemp, since we'll remove the file anyway
+  sname = tempfile.mktemp(prefix="ganeti-qa-multiplexer.")
+  utils.RemoveFile(sname)
+  opts = ["-N", "-oControlPath=%s" % sname, "-oControlMaster=yes"]
+  print "Created socket at %s" % sname
+  child = StartLocalCommand(GetSSHCommand(node, None, opts=opts))
+  _MULTIPLEXERS[node] = (sname, child)
  
  
-def GetCommandOutput(node, cmd):
+def CloseMultiplexers():
+  """Closes all current multiplexers and cleans up.
+
+  """
+  for node in _MULTIPLEXERS.keys():
+    (sname, child) = _MULTIPLEXERS.pop(node)
+    utils.KillProcess(child.pid, timeout=10, waitpid=True)
+    utils.RemoveFile(sname)
+
+
+def GetCommandOutput(node, cmd, tty=None, fail=False):
    """Returns the output of a command executed on the given node.
  
+  @type node: string
+  @param node: node the command should run on
+  @type cmd: string
+  @param cmd: command to be executed in the node (cannot be empty or None)
+  @type tty: bool or None
+  @param tty: if we should use tty; if None, it will be auto-detected
+  @type fail: bool
+  @param fail: whether the command is expected to fail
    """
-  p = subprocess.Popen(GetSSHCommand(node, cmd),
-                       shell=False, stdout=subprocess.PIPE)
-  AssertEqual(p.wait(), 0)
+  assert cmd
+  p = StartLocalCommand(GetSSHCommand(node, cmd, tty=tty),
+                        stdout=subprocess.PIPE)
+  rcode = p.wait()
+  _AssertRetCode(rcode, fail, cmd, node)
    return p.stdout.read()
  
  
@@ -150,6 +334,7 @@ def UploadFile(node, src):
  
    Caller needs to remove the returned file on the node when it's not needed
    anymore.
+
    """
    # Make sure nobody else has access to it while preserving local permissions
    mode = os.stat(src).st_mode & 0700
@@ -159,7 +344,7 @@ def UploadFile(node, src):
           'cat > "${tmp}" && '
           'echo "${tmp}"') % mode
  
-  f = open(src, 'r')
+  f = open(src, "r")
    try:
      p = subprocess.Popen(GetSSHCommand(node, cmd), shell=False, stdin=f,
                           stdout=subprocess.PIPE)
@@ -171,15 +356,57 @@ def UploadFile(node, src):
      f.close()
  
  
+def UploadData(node, data, mode=0600, filename=None):
+  """Uploads data to a node and returns the filename.
+
+  Caller needs to remove the returned file on the node when it's not needed
+  anymore.
+
+  """
+  if filename:
+    tmp = "tmp=%s" % utils.ShellQuote(filename)
+  else:
+    tmp = "tmp=$(tempfile --mode %o --prefix gnt)" % mode
+  cmd = ("%s && "
+         "[[ -f \"${tmp}\" ]] && "
+         "cat > \"${tmp}\" && "
+         "echo \"${tmp}\"") % tmp
+
+  p = subprocess.Popen(GetSSHCommand(node, cmd), shell=False,
+                       stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+  p.stdin.write(data)
+  p.stdin.close()
+  AssertEqual(p.wait(), 0)
+
+  # Return temporary filename
+  return p.stdout.read().strip()
+
+
+def BackupFile(node, path):
+  """Creates a backup of a file on the node and returns the filename.
+
+  Caller needs to remove the returned file on the node when it's not needed
+  anymore.
+
+  """
+  cmd = ("tmp=$(tempfile --prefix .gnt --directory=$(dirname %s)) && "
+         "[[ -f \"$tmp\" ]] && "
+         "cp %s $tmp && "
+         "echo $tmp") % (utils.ShellQuote(path), utils.ShellQuote(path))
+
+  # Return temporary filename
+  return GetCommandOutput(node, cmd).strip()
+
+
  def _ResolveName(cmd, key):
    """Helper function.
  
    """
    master = qa_config.GetMasterNode()
  
-  output = GetCommandOutput(master['primary'], utils.ShellQuoteArgs(cmd))
+  output = GetCommandOutput(master["primary"], utils.ShellQuoteArgs(cmd))
    for line in output.splitlines():
-    (lkey, lvalue) = line.split(':', 1)
+    (lkey, lvalue) = line.split(":", 1)
      if lkey == key:
        return lvalue.lstrip()
    raise KeyError("Key not found")
@@ -188,17 +415,20 @@ def _ResolveName(cmd, key):
  def ResolveInstanceName(instance):
    """Gets the full name of an instance.
  
+  @type instance: string
+  @param instance: Instance name
+
    """
-  return _ResolveName(['gnt-instance', 'info', instance['name']],
-                      'Instance name')
+  return _ResolveName(["gnt-instance", "info", instance],
+                      "Instance name")
  
  
  def ResolveNodeName(node):
    """Gets the full name of a node.
  
    """
-  return _ResolveName(['gnt-node', 'info', node['primary']],
-                      'Node name')
+  return _ResolveName(["gnt-node", "info", node["primary"]],
+                      "Node name")
  
  
  def GetNodeInstances(node, secondaries=False):
@@ -209,20 +439,119 @@ def GetNodeInstances(node, secondaries=False):
    node_name = ResolveNodeName(node)
  
    # Get list of all instances
-  cmd = ['gnt-instance', 'list', '--separator=:', '--no-headers',
-         '--output=name,pnode,snodes']
-  output = GetCommandOutput(master['primary'], utils.ShellQuoteArgs(cmd))
+  cmd = ["gnt-instance", "list", "--separator=:", "--no-headers",
+         "--output=name,pnode,snodes"]
+  output = GetCommandOutput(master["primary"], utils.ShellQuoteArgs(cmd))
  
    instances = []
    for line in output.splitlines():
-    (name, pnode, snodes) = line.split(':', 2)
+    (name, pnode, snodes) = line.split(":", 2)
      if ((not secondaries and pnode == node_name) or
-        (secondaries and node_name in snodes.split(','))):
+        (secondaries and node_name in snodes.split(","))):
        instances.append(name)
  
    return instances
  
  
+def _SelectQueryFields(rnd, fields):
+  """Generates a list of fields for query tests.
+
+  """
+  # Create copy for shuffling
+  fields = list(fields)
+  rnd.shuffle(fields)
+
+  # Check all fields
+  yield fields
+  yield sorted(fields)
+
+  # Duplicate fields
+  yield fields + fields
+
+  # Check small groups of fields
+  while fields:
+    yield [fields.pop() for _ in range(rnd.randint(2, 10)) if fields]
+
+
+def _List(listcmd, fields, names):
+  """Runs a list command.
+
+  """
+  master = qa_config.GetMasterNode()
+
+  cmd = [listcmd, "list", "--separator=|", "--no-headers",
+         "--output", ",".join(fields)]
+
+  if names:
+    cmd.extend(names)
+
+  return GetCommandOutput(master["primary"],
+                          utils.ShellQuoteArgs(cmd)).splitlines()
+
+
+def GenericQueryTest(cmd, fields, namefield="name", test_unknown=True):
+  """Runs a number of tests on query commands.
+
+  @param cmd: Command name
+  @param fields: List of field names
+
+  """
+  rnd = random.Random(hash(cmd))
+
+  fields = list(fields)
+  rnd.shuffle(fields)
+
+  # Test a number of field combinations
+  for testfields in _SelectQueryFields(rnd, fields):
+    AssertRedirectedCommand([cmd, "list", "--output", ",".join(testfields)])
+
+  if namefield is not None:
+    namelist_fn = compat.partial(_List, cmd, [namefield])
+
+    # When no names were requested, the list must be sorted
+    names = namelist_fn(None)
+    AssertEqual(names, utils.NiceSort(names))
+
+    # When requesting specific names, the order must be kept
+    revnames = list(reversed(names))
+    AssertEqual(namelist_fn(revnames), revnames)
+
+    randnames = list(names)
+    rnd.shuffle(randnames)
+    AssertEqual(namelist_fn(randnames), randnames)
+
+  if test_unknown:
+    # Listing unknown items must fail
+    AssertCommand([cmd, "list", "this.name.certainly.does.not.exist"],
+                  fail=True)
+
+  # Check exit code for listing unknown field
+  AssertEqual(AssertRedirectedCommand([cmd, "list",
+                                       "--output=field/does/not/exist"],
+                                      fail=True),
+              constants.EXIT_UNKNOWN_FIELD)
+
+
+def GenericQueryFieldsTest(cmd, fields):
+  master = qa_config.GetMasterNode()
+
+  # Listing fields
+  AssertRedirectedCommand([cmd, "list-fields"])
+  AssertRedirectedCommand([cmd, "list-fields"] + fields)
+
+  # Check listed fields (all, must be sorted)
+  realcmd = [cmd, "list-fields", "--separator=|", "--no-headers"]
+  output = GetCommandOutput(master["primary"],
+                            utils.ShellQuoteArgs(realcmd)).splitlines()
+  AssertEqual([line.split("|", 1)[0] for line in output],
+              utils.NiceSort(fields))
+
+  # Check exit code for listing unknown field
+  AssertEqual(AssertCommand([cmd, "list-fields", "field/does/not/exist"],
+                            fail=True),
+              constants.EXIT_UNKNOWN_FIELD)
+
+
  def _FormatWithColor(text, seq):
    if not seq:
      return text
@@ -232,3 +561,151 @@ def _FormatWithColor(text, seq):
  FormatWarning = lambda text: _FormatWithColor(text, _WARNING_SEQ)
  FormatError = lambda text: _FormatWithColor(text, _ERROR_SEQ)
  FormatInfo = lambda text: _FormatWithColor(text, _INFO_SEQ)
+
+
+def AddToEtcHosts(hostnames):
+  """Adds hostnames to /etc/hosts.
+
+  @param hostnames: List of hostnames first used A records, all other CNAMEs
+
+  """
+  master = qa_config.GetMasterNode()
+  tmp_hosts = UploadData(master["primary"], "", mode=0644)
+
+  data = []
+  for localhost in ("::1", "127.0.0.1"):
+    data.append("%s %s" % (localhost, " ".join(hostnames)))
+
+  try:
+    AssertCommand("{ cat %s && echo -e '%s'; } > %s && mv %s %s" %
+                  (utils.ShellQuote(pathutils.ETC_HOSTS),
+                   "\\n".join(data),
+                   utils.ShellQuote(tmp_hosts),
+                   utils.ShellQuote(tmp_hosts),
+                   utils.ShellQuote(pathutils.ETC_HOSTS)))
+  except Exception:
+    AssertCommand(["rm", "-f", tmp_hosts])
+    raise
+
+
+def RemoveFromEtcHosts(hostnames):
+  """Remove hostnames from /etc/hosts.
+
+  @param hostnames: List of hostnames first used A records, all other CNAMEs
+
+  """
+  master = qa_config.GetMasterNode()
+  tmp_hosts = UploadData(master["primary"], "", mode=0644)
+  quoted_tmp_hosts = utils.ShellQuote(tmp_hosts)
+
+  sed_data = " ".join(hostnames)
+  try:
+    AssertCommand(("sed -e '/^\(::1\|127\.0\.0\.1\)\s\+%s/d' %s > %s"
+                   " && mv %s %s") %
+                   (sed_data, utils.ShellQuote(pathutils.ETC_HOSTS),
+                    quoted_tmp_hosts, quoted_tmp_hosts,
+                    utils.ShellQuote(pathutils.ETC_HOSTS)))
+  except Exception:
+    AssertCommand(["rm", "-f", tmp_hosts])
+    raise
+
+
+def RunInstanceCheck(instance, running):
+  """Check if instance is running or not.
+
+  """
+  instance_name = _GetName(instance, "name")
+
+  script = qa_config.GetInstanceCheckScript()
+  if not script:
+    return
+
+  master_node = qa_config.GetMasterNode()
+
+  # Build command to connect to master node
+  master_ssh = GetSSHCommand(master_node["primary"], "--")
+
+  if running:
+    running_shellval = "1"
+    running_text = ""
+  else:
+    running_shellval = ""
+    running_text = "not "
+
+  print FormatInfo("Checking if instance '%s' is %srunning" %
+                   (instance_name, running_text))
+
+  args = [script, instance_name]
+  env = {
+    "PATH": constants.HOOKS_PATH,
+    "RUN_UUID": _RUN_UUID,
+    "MASTER_SSH": utils.ShellQuoteArgs(master_ssh),
+    "INSTANCE_NAME": instance_name,
+    "INSTANCE_RUNNING": running_shellval,
+    }
+
+  result = os.spawnve(os.P_WAIT, script, args, env)
+  if result != 0:
+    raise qa_error.Error("Instance check failed with result %s" % result)
+
+
+def _InstanceCheckInner(expected, instarg, args, result):
+  """Helper function used by L{InstanceCheck}.
+
+  """
+  if instarg == FIRST_ARG:
+    instance = args[0]
+  elif instarg == RETURN_VALUE:
+    instance = result
+  else:
+    raise Exception("Invalid value '%s' for instance argument" % instarg)
+
+  if expected in (INST_DOWN, INST_UP):
+    RunInstanceCheck(instance, (expected == INST_UP))
+  elif expected is not None:
+    raise Exception("Invalid value '%s'" % expected)
+
+
+def InstanceCheck(before, after, instarg):
+  """Decorator to check instance status before and after test.
+
+  @param before: L{INST_DOWN} if instance must be stopped before test,
+    L{INST_UP} if instance must be running before test, L{None} to not check.
+  @param after: L{INST_DOWN} if instance must be stopped after test,
+    L{INST_UP} if instance must be running after test, L{None} to not check.
+  @param instarg: L{FIRST_ARG} to use first argument to test as instance (a
+    dictionary), L{RETURN_VALUE} to use return value (disallows pre-checks)
+
+  """
+  def decorator(fn):
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+      _InstanceCheckInner(before, instarg, args, NotImplemented)
+
+      result = fn(*args, **kwargs)
+
+      _InstanceCheckInner(after, instarg, args, result)
+
+      return result
+    return wrapper
+  return decorator
+
+
+def GetNonexistentGroups(count):
+  """Gets group names which shouldn't exist on the cluster.
+
+  @param count: Number of groups to get
+  @rtype: list
+
+  """
+  groups = qa_config.get("groups", {})
+
+  default = ["group1", "group2", "group3"]
+  assert count <= len(default)
+
+  candidates = groups.get("inexistent-groups", default)[:count]
+
+  if len(candidates) < count:
+    raise Exception("At least %s non-existent groups are needed" % count)
+
+  return candidates