Statistics
| Branch: | Tag: | Revision:

root / qa / qa_cluster.py @ 3601d488

History | View | Annotate | Download (17.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Cluster related QA tests.
23

24
"""
25

    
26
import tempfile
27
import os.path
28

    
29
from ganeti import constants
30
from ganeti import compat
31
from ganeti import utils
32
from ganeti import pathutils
33

    
34
import qa_config
35
import qa_utils
36
import qa_error
37

    
38
from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
39

    
40

    
41
#: cluster verify command
42
_CLUSTER_VERIFY = ["gnt-cluster", "verify"]
43

    
44

    
45
def _RemoveFileFromAllNodes(filename):
46
  """Removes a file from all nodes.
47

48
  """
49
  for node in qa_config.get("nodes"):
50
    AssertCommand(["rm", "-f", filename], node=node)
51

    
52

    
53
def _CheckFileOnAllNodes(filename, content):
54
  """Verifies the content of the given file on all nodes.
55

56
  """
57
  cmd = utils.ShellQuoteArgs(["cat", filename])
58
  for node in qa_config.get("nodes"):
59
    AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
60

    
61

    
62
# data for testing failures due to bad keys/values for disk parameters
63
_FAIL_PARAMS = ["nonexistent:resync-rate=1",
64
                "drbd:nonexistent=1",
65
                "drbd:resync-rate=invalid",
66
                ]
67

    
68

    
69
def TestClusterInitDisk():
70
  """gnt-cluster init -D"""
71
  name = qa_config.get("name")
72
  for param in _FAIL_PARAMS:
73
    AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
74

    
75

    
76
def TestClusterInit(rapi_user, rapi_secret):
77
  """gnt-cluster init"""
78
  master = qa_config.GetMasterNode()
79

    
80
  rapi_dir = os.path.dirname(pathutils.RAPI_USERS_FILE)
81

    
82
  # First create the RAPI credentials
83
  fh = tempfile.NamedTemporaryFile()
84
  try:
85
    fh.write("%s %s write\n" % (rapi_user, rapi_secret))
86
    fh.flush()
87

    
88
    tmpru = qa_utils.UploadFile(master["primary"], fh.name)
89
    try:
90
      AssertCommand(["mkdir", "-p", rapi_dir])
91
      AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
92
    finally:
93
      AssertCommand(["rm", "-f", tmpru])
94
  finally:
95
    fh.close()
96

    
97
  # Initialize cluster
98
  cmd = [
99
    "gnt-cluster", "init",
100
    "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4),
101
    "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()),
102
    ]
103

    
104
  for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
105
                    "nic-count"):
106
    for spec_val in ("min", "max", "std"):
107
      spec = qa_config.get("ispec_%s_%s" %
108
                           (spec_type.replace("-", "_"), spec_val), None)
109
      if spec:
110
        cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
111

    
112
  if master.get("secondary", None):
113
    cmd.append("--secondary-ip=%s" % master["secondary"])
114

    
115
  master_netdev = qa_config.get("master-netdev", None)
116
  if master_netdev:
117
    cmd.append("--master-netdev=%s" % master_netdev)
118

    
119
  nicparams = qa_config.get("default-nicparams", None)
120
  if nicparams:
121
    cmd.append("--nic-parameters=%s" %
122
               ",".join(utils.FormatKeyValue(nicparams)))
123

    
124
  cmd.append(qa_config.get("name"))
125
  AssertCommand(cmd)
126

    
127
  cmd = ["gnt-cluster", "modify"]
128

    
129
  # hypervisor parameter modifications
130
  hvp = qa_config.get("hypervisor-parameters", {})
131
  for k, v in hvp.items():
132
    cmd.extend(["-H", "%s:%s" % (k, v)])
133
  # backend parameter modifications
134
  bep = qa_config.get("backend-parameters", "")
135
  if bep:
136
    cmd.extend(["-B", bep])
137

    
138
  if len(cmd) > 2:
139
    AssertCommand(cmd)
140

    
141
  # OS parameters
142
  osp = qa_config.get("os-parameters", {})
143
  for k, v in osp.items():
144
    AssertCommand(["gnt-os", "modify", "-O", v, k])
145

    
146
  # OS hypervisor parameters
147
  os_hvp = qa_config.get("os-hvp", {})
148
  for os_name in os_hvp:
149
    for hv, hvp in os_hvp[os_name].items():
150
      AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
151

    
152

    
153
def TestClusterRename():
154
  """gnt-cluster rename"""
155
  cmd = ["gnt-cluster", "rename", "-f"]
156

    
157
  original_name = qa_config.get("name")
158
  rename_target = qa_config.get("rename", None)
159
  if rename_target is None:
160
    print qa_utils.FormatError('"rename" entry is missing')
161
    return
162

    
163
  for data in [
164
    cmd + [rename_target],
165
    _CLUSTER_VERIFY,
166
    cmd + [original_name],
167
    _CLUSTER_VERIFY,
168
    ]:
169
    AssertCommand(data)
170

    
171

    
172
def TestClusterOob():
173
  """out-of-band framework"""
174
  oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
175

    
176
  AssertCommand(_CLUSTER_VERIFY)
177
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
178
                 "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
179
                 utils.NewUUID()])
180

    
181
  AssertCommand(_CLUSTER_VERIFY, fail=True)
182

    
183
  AssertCommand(["touch", oob_path_exists])
184
  AssertCommand(["chmod", "0400", oob_path_exists])
185
  AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
186

    
187
  try:
188
    AssertCommand(["gnt-cluster", "modify", "--node-parameters",
189
                   "oob_program=%s" % oob_path_exists])
190

    
191
    AssertCommand(_CLUSTER_VERIFY, fail=True)
192

    
193
    AssertCommand(["chmod", "0500", oob_path_exists])
194
    AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
195

    
196
    AssertCommand(_CLUSTER_VERIFY)
197
  finally:
198
    AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
199

    
200
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
201
                 "oob_program="])
202

    
203

    
204
def TestClusterEpo():
205
  """gnt-cluster epo"""
206
  master = qa_config.GetMasterNode()
207

    
208
  # Assert that OOB is unavailable for all nodes
209
  result_output = GetCommandOutput(master["primary"],
210
                                   "gnt-node list --verbose --no-headers -o"
211
                                   " powered")
212
  AssertEqual(compat.all(powered == "(unavail)"
213
                         for powered in result_output.splitlines()), True)
214

    
215
  # Conflicting
216
  AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
217
  # --all doesn't expect arguments
218
  AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
219

    
220
  # Unless --all is given master is not allowed to be in the list
221
  AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
222

    
223
  # This shouldn't fail
224
  AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
225

    
226
  # All instances should have been stopped now
227
  result_output = GetCommandOutput(master["primary"],
228
                                   "gnt-instance list --no-headers -o status")
229
  # ERROR_down because the instance is stopped but not recorded as such
230
  AssertEqual(compat.all(status == "ERROR_down"
231
                         for status in result_output.splitlines()), True)
232

    
233
  # Now start everything again
234
  AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
235

    
236
  # All instances should have been started now
237
  result_output = GetCommandOutput(master["primary"],
238
                                   "gnt-instance list --no-headers -o status")
239
  AssertEqual(compat.all(status == "running"
240
                         for status in result_output.splitlines()), True)
241

    
242

    
243
def TestClusterVerify():
244
  """gnt-cluster verify"""
245
  AssertCommand(_CLUSTER_VERIFY)
246
  AssertCommand(["gnt-cluster", "verify-disks"])
247

    
248

    
249
def TestJobqueue():
250
  """gnt-debug test-jobqueue"""
251
  AssertCommand(["gnt-debug", "test-jobqueue"])
252

    
253

    
254
def TestDelay(node):
255
  """gnt-debug delay"""
256
  AssertCommand(["gnt-debug", "delay", "1"])
257
  AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
258
  AssertCommand(["gnt-debug", "delay", "--no-master",
259
                 "-n", node["primary"], "1"])
260

    
261

    
262
def TestClusterReservedLvs():
263
  """gnt-cluster reserved lvs"""
264
  for fail, cmd in [
265
    (False, _CLUSTER_VERIFY),
266
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
267
    (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
268
    (True, _CLUSTER_VERIFY),
269
    (False, ["gnt-cluster", "modify", "--reserved-lvs",
270
             "xenvg/qa-test,.*/other-test"]),
271
    (False, _CLUSTER_VERIFY),
272
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
273
    (False, _CLUSTER_VERIFY),
274
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
275
    (True, _CLUSTER_VERIFY),
276
    (False, ["lvremove", "-f", "xenvg/qa-test"]),
277
    (False, _CLUSTER_VERIFY),
278
    ]:
279
    AssertCommand(cmd, fail=fail)
280

    
281

    
282
def TestClusterModifyEmpty():
283
  """gnt-cluster modify"""
284
  AssertCommand(["gnt-cluster", "modify"], fail=True)
285

    
286

    
287
def TestClusterModifyDisk():
288
  """gnt-cluster modify -D"""
289
  for param in _FAIL_PARAMS:
290
    AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
291

    
292

    
293
def TestClusterModifyBe():
294
  """gnt-cluster modify -B"""
295
  for fail, cmd in [
296
    # max/min mem
297
    (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
298
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
299
    (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
300
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
301
    (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
302
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
303
    (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
304
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
305
    (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
306
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
307
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
308
    # vcpus
309
    (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
310
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
311
    (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
312
    (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
313
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
314
    # auto_balance
315
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
316
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
317
    (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
318
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
319
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
320
    ]:
321
    AssertCommand(cmd, fail=fail)
322

    
323
  # redo the original-requested BE parameters, if any
324
  bep = qa_config.get("backend-parameters", "")
325
  if bep:
326
    AssertCommand(["gnt-cluster", "modify", "-B", bep])
327

    
328

    
329
def TestClusterInfo():
330
  """gnt-cluster info"""
331
  AssertCommand(["gnt-cluster", "info"])
332

    
333

    
334
def TestClusterRedistConf():
335
  """gnt-cluster redist-conf"""
336
  AssertCommand(["gnt-cluster", "redist-conf"])
337

    
338

    
339
def TestClusterGetmaster():
340
  """gnt-cluster getmaster"""
341
  AssertCommand(["gnt-cluster", "getmaster"])
342

    
343

    
344
def TestClusterVersion():
345
  """gnt-cluster version"""
346
  AssertCommand(["gnt-cluster", "version"])
347

    
348

    
349
def TestClusterRenewCrypto():
350
  """gnt-cluster renew-crypto"""
351
  master = qa_config.GetMasterNode()
352

    
353
  # Conflicting options
354
  cmd = ["gnt-cluster", "renew-crypto", "--force",
355
         "--new-cluster-certificate", "--new-confd-hmac-key"]
356
  conflicting = [
357
    ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
358
    ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
359
    ]
360
  for i in conflicting:
361
    AssertCommand(cmd + i, fail=True)
362

    
363
  # Invalid RAPI certificate
364
  cmd = ["gnt-cluster", "renew-crypto", "--force",
365
         "--rapi-certificate=/dev/null"]
366
  AssertCommand(cmd, fail=True)
367

    
368
  rapi_cert_backup = qa_utils.BackupFile(master["primary"],
369
                                         pathutils.RAPI_CERT_FILE)
370
  try:
371
    # Custom RAPI certificate
372
    fh = tempfile.NamedTemporaryFile()
373

    
374
    # Ensure certificate doesn't cause "gnt-cluster verify" to complain
375
    validity = constants.SSL_CERT_EXPIRATION_WARN * 3
376

    
377
    utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
378

    
379
    tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
380
    try:
381
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
382
                     "--rapi-certificate=%s" % tmpcert])
383
    finally:
384
      AssertCommand(["rm", "-f", tmpcert])
385

    
386
    # Custom cluster domain secret
387
    cds_fh = tempfile.NamedTemporaryFile()
388
    cds_fh.write(utils.GenerateSecret())
389
    cds_fh.write("\n")
390
    cds_fh.flush()
391

    
392
    tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
393
    try:
394
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
395
                     "--cluster-domain-secret=%s" % tmpcds])
396
    finally:
397
      AssertCommand(["rm", "-f", tmpcds])
398

    
399
    # Normal case
400
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
401
                   "--new-cluster-certificate", "--new-confd-hmac-key",
402
                   "--new-rapi-certificate", "--new-cluster-domain-secret"])
403

    
404
    # Restore RAPI certificate
405
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
406
                   "--rapi-certificate=%s" % rapi_cert_backup])
407
  finally:
408
    AssertCommand(["rm", "-f", rapi_cert_backup])
409

    
410

    
411
def TestClusterBurnin():
412
  """Burnin"""
413
  master = qa_config.GetMasterNode()
414

    
415
  options = qa_config.get("options", {})
416
  disk_template = options.get("burnin-disk-template", "drbd")
417
  parallel = options.get("burnin-in-parallel", False)
418
  check_inst = options.get("burnin-check-instances", False)
419
  do_rename = options.get("burnin-rename", "")
420
  do_reboot = options.get("burnin-reboot", True)
421
  reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
422

    
423
  # Get as many instances as we need
424
  instances = []
425
  try:
426
    try:
427
      num = qa_config.get("options", {}).get("burnin-instances", 1)
428
      for _ in range(0, num):
429
        instances.append(qa_config.AcquireInstance())
430
    except qa_error.OutOfInstancesError:
431
      print "Not enough instances, continuing anyway."
432

    
433
    if len(instances) < 1:
434
      raise qa_error.Error("Burnin needs at least one instance")
435

    
436
    script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
437
    try:
438
      # Run burnin
439
      cmd = [script,
440
             "--os=%s" % qa_config.get("os"),
441
             "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
442
             "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
443
             "--disk-size=%s" % ",".join(qa_config.get("disk")),
444
             "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
445
             "--disk-template=%s" % disk_template]
446
      if parallel:
447
        cmd.append("--parallel")
448
        cmd.append("--early-release")
449
      if check_inst:
450
        cmd.append("--http-check")
451
      if do_rename:
452
        cmd.append("--rename=%s" % do_rename)
453
      if not do_reboot:
454
        cmd.append("--no-reboot")
455
      else:
456
        cmd.append("--reboot-types=%s" % ",".join(reboot_types))
457
      cmd += [inst["name"] for inst in instances]
458
      AssertCommand(cmd)
459
    finally:
460
      AssertCommand(["rm", "-f", script])
461

    
462
  finally:
463
    for inst in instances:
464
      qa_config.ReleaseInstance(inst)
465

    
466

    
467
def TestClusterMasterFailover():
468
  """gnt-cluster master-failover"""
469
  master = qa_config.GetMasterNode()
470
  failovermaster = qa_config.AcquireNode(exclude=master)
471

    
472
  cmd = ["gnt-cluster", "master-failover"]
473
  try:
474
    AssertCommand(cmd, node=failovermaster)
475
    # Back to original master node
476
    AssertCommand(cmd, node=master)
477
  finally:
478
    qa_config.ReleaseNode(failovermaster)
479

    
480

    
481
def TestClusterMasterFailoverWithDrainedQueue():
482
  """gnt-cluster master-failover with drained queue"""
483
  drain_check = ["test", "-f", pathutils.JOB_QUEUE_DRAIN_FILE]
484

    
485
  master = qa_config.GetMasterNode()
486
  failovermaster = qa_config.AcquireNode(exclude=master)
487

    
488
  # Ensure queue is not drained
489
  for node in [master, failovermaster]:
490
    AssertCommand(drain_check, node=node, fail=True)
491

    
492
  # Drain queue on failover master
493
  AssertCommand(["touch", pathutils.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
494

    
495
  cmd = ["gnt-cluster", "master-failover"]
496
  try:
497
    AssertCommand(drain_check, node=failovermaster)
498
    AssertCommand(cmd, node=failovermaster)
499
    AssertCommand(drain_check, fail=True)
500
    AssertCommand(drain_check, node=failovermaster, fail=True)
501

    
502
    # Back to original master node
503
    AssertCommand(cmd, node=master)
504
  finally:
505
    qa_config.ReleaseNode(failovermaster)
506

    
507
  AssertCommand(drain_check, fail=True)
508
  AssertCommand(drain_check, node=failovermaster, fail=True)
509

    
510

    
511
def TestClusterCopyfile():
512
  """gnt-cluster copyfile"""
513
  master = qa_config.GetMasterNode()
514

    
515
  uniqueid = utils.NewUUID()
516

    
517
  # Create temporary file
518
  f = tempfile.NamedTemporaryFile()
519
  f.write(uniqueid)
520
  f.flush()
521
  f.seek(0)
522

    
523
  # Upload file to master node
524
  testname = qa_utils.UploadFile(master["primary"], f.name)
525
  try:
526
    # Copy file to all nodes
527
    AssertCommand(["gnt-cluster", "copyfile", testname])
528
    _CheckFileOnAllNodes(testname, uniqueid)
529
  finally:
530
    _RemoveFileFromAllNodes(testname)
531

    
532

    
533
def TestClusterCommand():
534
  """gnt-cluster command"""
535
  uniqueid = utils.NewUUID()
536
  rfile = "/tmp/gnt%s" % utils.NewUUID()
537
  rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
538
  cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
539
                              "%s >%s" % (rcmd, rfile)])
540

    
541
  try:
542
    AssertCommand(cmd)
543
    _CheckFileOnAllNodes(rfile, uniqueid)
544
  finally:
545
    _RemoveFileFromAllNodes(rfile)
546

    
547

    
548
def TestClusterDestroy():
549
  """gnt-cluster destroy"""
550
  AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
551

    
552

    
553
def TestClusterRepairDiskSizes():
554
  """gnt-cluster repair-disk-sizes"""
555
  AssertCommand(["gnt-cluster", "repair-disk-sizes"])