Statistics
| Branch: | Tag: | Revision:

root / qa / qa_cluster.py @ 304d9f02

History | View | Annotate | Download (17 kB)

1
#
2
#
3

    
4
# Copyright (C) 2007, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Cluster related QA tests.
23

24
"""
25

    
26
import tempfile
27
import os.path
28

    
29
from ganeti import constants
30
from ganeti import compat
31
from ganeti import utils
32
from ganeti import pathutils
33

    
34
import qa_config
35
import qa_utils
36
import qa_error
37

    
38
from qa_utils import AssertEqual, AssertCommand, GetCommandOutput
39

    
40

    
41
#: cluster verify command
42
_CLUSTER_VERIFY = ["gnt-cluster", "verify"]
43

    
44

    
45
def _RemoveFileFromAllNodes(filename):
46
  """Removes a file from all nodes.
47

48
  """
49
  for node in qa_config.get("nodes"):
50
    AssertCommand(["rm", "-f", filename], node=node)
51

    
52

    
53
def _CheckFileOnAllNodes(filename, content):
54
  """Verifies the content of the given file on all nodes.
55

56
  """
57
  cmd = utils.ShellQuoteArgs(["cat", filename])
58
  for node in qa_config.get("nodes"):
59
    AssertEqual(qa_utils.GetCommandOutput(node["primary"], cmd), content)
60

    
61

    
62
# data for testing failures due to bad keys/values for disk parameters
63
_FAIL_PARAMS = ["nonexistent:resync-rate=1",
64
                "drbd:nonexistent=1",
65
                "drbd:resync-rate=invalid",
66
                ]
67

    
68

    
69
def TestClusterInitDisk():
70
  """gnt-cluster init -D"""
71
  name = qa_config.get("name")
72
  for param in _FAIL_PARAMS:
73
    AssertCommand(["gnt-cluster", "init", "-D", param, name], fail=True)
74

    
75

    
76
def TestClusterInit(rapi_user, rapi_secret):
77
  """gnt-cluster init"""
78
  master = qa_config.GetMasterNode()
79

    
80
  rapi_dir = os.path.dirname(pathutils.RAPI_USERS_FILE)
81

    
82
  # First create the RAPI credentials
83
  fh = tempfile.NamedTemporaryFile()
84
  try:
85
    fh.write("%s %s write\n" % (rapi_user, rapi_secret))
86
    fh.flush()
87

    
88
    tmpru = qa_utils.UploadFile(master["primary"], fh.name)
89
    try:
90
      AssertCommand(["mkdir", "-p", rapi_dir])
91
      AssertCommand(["mv", tmpru, pathutils.RAPI_USERS_FILE])
92
    finally:
93
      AssertCommand(["rm", "-f", tmpru])
94
  finally:
95
    fh.close()
96

    
97
  # Initialize cluster
98
  cmd = [
99
    "gnt-cluster", "init",
100
    "--primary-ip-version=%d" % qa_config.get("primary_ip_version", 4),
101
    "--enabled-hypervisors=%s" % ",".join(qa_config.GetEnabledHypervisors()),
102
    ]
103

    
104
  for spec_type in ("mem-size", "disk-size", "disk-count", "cpu-count",
105
                    "nic-count"):
106
    for spec_val in ("min", "max", "std"):
107
      spec = qa_config.get("ispec_%s_%s" %
108
                           (spec_type.replace('-', '_'), spec_val), None)
109
      if spec:
110
        cmd.append("--specs-%s=%s=%d" % (spec_type, spec_val, spec))
111

    
112
  if master.get("secondary", None):
113
    cmd.append("--secondary-ip=%s" % master["secondary"])
114

    
115
  bridge = qa_config.get("bridge", None)
116
  if bridge:
117
    cmd.append("--bridge=%s" % bridge)
118
    cmd.append("--master-netdev=%s" % bridge)
119

    
120
  cmd.append(qa_config.get("name"))
121
  AssertCommand(cmd)
122

    
123
  cmd = ["gnt-cluster", "modify"]
124

    
125
  # hypervisor parameter modifications
126
  hvp = qa_config.get("hypervisor-parameters", {})
127
  for k, v in hvp.items():
128
    cmd.extend(["-H", "%s:%s" % (k, v)])
129
  # backend parameter modifications
130
  bep = qa_config.get("backend-parameters", "")
131
  if bep:
132
    cmd.extend(["-B", bep])
133

    
134
  if len(cmd) > 2:
135
    AssertCommand(cmd)
136

    
137
  # OS parameters
138
  osp = qa_config.get("os-parameters", {})
139
  for k, v in osp.items():
140
    AssertCommand(["gnt-os", "modify", "-O", v, k])
141

    
142
  # OS hypervisor parameters
143
  os_hvp = qa_config.get("os-hvp", {})
144
  for os_name in os_hvp:
145
    for hv, hvp in os_hvp[os_name].items():
146
      AssertCommand(["gnt-os", "modify", "-H", "%s:%s" % (hv, hvp), os_name])
147

    
148

    
149
def TestClusterRename():
150
  """gnt-cluster rename"""
151
  cmd = ["gnt-cluster", "rename", "-f"]
152

    
153
  original_name = qa_config.get("name")
154
  rename_target = qa_config.get("rename", None)
155
  if rename_target is None:
156
    print qa_utils.FormatError('"rename" entry is missing')
157
    return
158

    
159
  for data in [
160
    cmd + [rename_target],
161
    _CLUSTER_VERIFY,
162
    cmd + [original_name],
163
    _CLUSTER_VERIFY,
164
    ]:
165
    AssertCommand(data)
166

    
167

    
168
def TestClusterOob():
169
  """out-of-band framework"""
170
  oob_path_exists = "/tmp/ganeti-qa-oob-does-exist-%s" % utils.NewUUID()
171

    
172
  AssertCommand(_CLUSTER_VERIFY)
173
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
174
                 "oob_program=/tmp/ganeti-qa-oob-does-not-exist-%s" %
175
                 utils.NewUUID()])
176

    
177
  AssertCommand(_CLUSTER_VERIFY, fail=True)
178

    
179
  AssertCommand(["touch", oob_path_exists])
180
  AssertCommand(["chmod", "0400", oob_path_exists])
181
  AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
182

    
183
  try:
184
    AssertCommand(["gnt-cluster", "modify", "--node-parameters",
185
                   "oob_program=%s" % oob_path_exists])
186

    
187
    AssertCommand(_CLUSTER_VERIFY, fail=True)
188

    
189
    AssertCommand(["chmod", "0500", oob_path_exists])
190
    AssertCommand(["gnt-cluster", "copyfile", oob_path_exists])
191

    
192
    AssertCommand(_CLUSTER_VERIFY)
193
  finally:
194
    AssertCommand(["gnt-cluster", "command", "rm", oob_path_exists])
195

    
196
  AssertCommand(["gnt-cluster", "modify", "--node-parameters",
197
                 "oob_program="])
198

    
199

    
200
def TestClusterEpo():
201
  """gnt-cluster epo"""
202
  master = qa_config.GetMasterNode()
203

    
204
  # Assert that OOB is unavailable for all nodes
205
  result_output = GetCommandOutput(master["primary"],
206
                                   "gnt-node list --verbose --no-headers -o"
207
                                   " powered")
208
  AssertEqual(compat.all(powered == "(unavail)"
209
                         for powered in result_output.splitlines()), True)
210

    
211
  # Conflicting
212
  AssertCommand(["gnt-cluster", "epo", "--groups", "--all"], fail=True)
213
  # --all doesn't expect arguments
214
  AssertCommand(["gnt-cluster", "epo", "--all", "some_arg"], fail=True)
215

    
216
  # Unless --all is given master is not allowed to be in the list
217
  AssertCommand(["gnt-cluster", "epo", "-f", master["primary"]], fail=True)
218

    
219
  # This shouldn't fail
220
  AssertCommand(["gnt-cluster", "epo", "-f", "--all"])
221

    
222
  # All instances should have been stopped now
223
  result_output = GetCommandOutput(master["primary"],
224
                                   "gnt-instance list --no-headers -o status")
225
  # ERROR_down because the instance is stopped but not recorded as such
226
  AssertEqual(compat.all(status == "ERROR_down"
227
                         for status in result_output.splitlines()), True)
228

    
229
  # Now start everything again
230
  AssertCommand(["gnt-cluster", "epo", "--on", "-f", "--all"])
231

    
232
  # All instances should have been started now
233
  result_output = GetCommandOutput(master["primary"],
234
                                   "gnt-instance list --no-headers -o status")
235
  AssertEqual(compat.all(status == "running"
236
                         for status in result_output.splitlines()), True)
237

    
238

    
239
def TestClusterVerify():
240
  """gnt-cluster verify"""
241
  AssertCommand(_CLUSTER_VERIFY)
242
  AssertCommand(["gnt-cluster", "verify-disks"])
243

    
244

    
245
def TestJobqueue():
246
  """gnt-debug test-jobqueue"""
247
  AssertCommand(["gnt-debug", "test-jobqueue"])
248

    
249

    
250
def TestDelay(node):
251
  """gnt-debug delay"""
252
  AssertCommand(["gnt-debug", "delay", "1"])
253
  AssertCommand(["gnt-debug", "delay", "--no-master", "1"])
254
  AssertCommand(["gnt-debug", "delay", "--no-master",
255
                 "-n", node["primary"], "1"])
256

    
257

    
258
def TestClusterReservedLvs():
259
  """gnt-cluster reserved lvs"""
260
  for fail, cmd in [
261
    (False, _CLUSTER_VERIFY),
262
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
263
    (False, ["lvcreate", "-L1G", "-nqa-test", "xenvg"]),
264
    (True, _CLUSTER_VERIFY),
265
    (False, ["gnt-cluster", "modify", "--reserved-lvs",
266
             "xenvg/qa-test,.*/other-test"]),
267
    (False, _CLUSTER_VERIFY),
268
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ".*/qa-.*"]),
269
    (False, _CLUSTER_VERIFY),
270
    (False, ["gnt-cluster", "modify", "--reserved-lvs", ""]),
271
    (True, _CLUSTER_VERIFY),
272
    (False, ["lvremove", "-f", "xenvg/qa-test"]),
273
    (False, _CLUSTER_VERIFY),
274
    ]:
275
    AssertCommand(cmd, fail=fail)
276

    
277

    
278
def TestClusterModifyEmpty():
279
  """gnt-cluster modify"""
280
  AssertCommand(["gnt-cluster", "modify"], fail=True)
281

    
282

    
283
def TestClusterModifyDisk():
284
  """gnt-cluster modify -D"""
285
  for param in _FAIL_PARAMS:
286
    AssertCommand(["gnt-cluster", "modify", "-D", param], fail=True)
287

    
288

    
289
def TestClusterModifyBe():
290
  """gnt-cluster modify -B"""
291
  for fail, cmd in [
292
    # max/min mem
293
    (False, ["gnt-cluster", "modify", "-B", "maxmem=256"]),
294
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
295
    (False, ["gnt-cluster", "modify", "-B", "minmem=256"]),
296
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
297
    (True, ["gnt-cluster", "modify", "-B", "maxmem=a"]),
298
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 256$'"]),
299
    (True, ["gnt-cluster", "modify", "-B", "minmem=a"]),
300
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 256$'"]),
301
    (False, ["gnt-cluster", "modify", "-B", "maxmem=128,minmem=128"]),
302
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *maxmem: 128$'"]),
303
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *minmem: 128$'"]),
304
    # vcpus
305
    (False, ["gnt-cluster", "modify", "-B", "vcpus=4"]),
306
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 4$'"]),
307
    (True, ["gnt-cluster", "modify", "-B", "vcpus=a"]),
308
    (False, ["gnt-cluster", "modify", "-B", "vcpus=1"]),
309
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *vcpus: 1$'"]),
310
    # auto_balance
311
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=False"]),
312
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: False$'"]),
313
    (True, ["gnt-cluster", "modify", "-B", "auto_balance=1"]),
314
    (False, ["gnt-cluster", "modify", "-B", "auto_balance=True"]),
315
    (False, ["sh", "-c", "gnt-cluster info|grep '^ *auto_balance: True$'"]),
316
    ]:
317
    AssertCommand(cmd, fail=fail)
318

    
319
  # redo the original-requested BE parameters, if any
320
  bep = qa_config.get("backend-parameters", "")
321
  if bep:
322
    AssertCommand(["gnt-cluster", "modify", "-B", bep])
323

    
324

    
325
def TestClusterInfo():
326
  """gnt-cluster info"""
327
  AssertCommand(["gnt-cluster", "info"])
328

    
329

    
330
def TestClusterRedistConf():
331
  """gnt-cluster redist-conf"""
332
  AssertCommand(["gnt-cluster", "redist-conf"])
333

    
334

    
335
def TestClusterGetmaster():
336
  """gnt-cluster getmaster"""
337
  AssertCommand(["gnt-cluster", "getmaster"])
338

    
339

    
340
def TestClusterVersion():
341
  """gnt-cluster version"""
342
  AssertCommand(["gnt-cluster", "version"])
343

    
344

    
345
def TestClusterRenewCrypto():
346
  """gnt-cluster renew-crypto"""
347
  master = qa_config.GetMasterNode()
348

    
349
  # Conflicting options
350
  cmd = ["gnt-cluster", "renew-crypto", "--force",
351
         "--new-cluster-certificate", "--new-confd-hmac-key"]
352
  conflicting = [
353
    ["--new-rapi-certificate", "--rapi-certificate=/dev/null"],
354
    ["--new-cluster-domain-secret", "--cluster-domain-secret=/dev/null"],
355
    ]
356
  for i in conflicting:
357
    AssertCommand(cmd + i, fail=True)
358

    
359
  # Invalid RAPI certificate
360
  cmd = ["gnt-cluster", "renew-crypto", "--force",
361
         "--rapi-certificate=/dev/null"]
362
  AssertCommand(cmd, fail=True)
363

    
364
  rapi_cert_backup = qa_utils.BackupFile(master["primary"],
365
                                         pathutils.RAPI_CERT_FILE)
366
  try:
367
    # Custom RAPI certificate
368
    fh = tempfile.NamedTemporaryFile()
369

    
370
    # Ensure certificate doesn't cause "gnt-cluster verify" to complain
371
    validity = constants.SSL_CERT_EXPIRATION_WARN * 3
372

    
373
    utils.GenerateSelfSignedSslCert(fh.name, validity=validity)
374

    
375
    tmpcert = qa_utils.UploadFile(master["primary"], fh.name)
376
    try:
377
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
378
                     "--rapi-certificate=%s" % tmpcert])
379
    finally:
380
      AssertCommand(["rm", "-f", tmpcert])
381

    
382
    # Custom cluster domain secret
383
    cds_fh = tempfile.NamedTemporaryFile()
384
    cds_fh.write(utils.GenerateSecret())
385
    cds_fh.write("\n")
386
    cds_fh.flush()
387

    
388
    tmpcds = qa_utils.UploadFile(master["primary"], cds_fh.name)
389
    try:
390
      AssertCommand(["gnt-cluster", "renew-crypto", "--force",
391
                     "--cluster-domain-secret=%s" % tmpcds])
392
    finally:
393
      AssertCommand(["rm", "-f", tmpcds])
394

    
395
    # Normal case
396
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
397
                   "--new-cluster-certificate", "--new-confd-hmac-key",
398
                   "--new-rapi-certificate", "--new-cluster-domain-secret"])
399

    
400
    # Restore RAPI certificate
401
    AssertCommand(["gnt-cluster", "renew-crypto", "--force",
402
                   "--rapi-certificate=%s" % rapi_cert_backup])
403
  finally:
404
    AssertCommand(["rm", "-f", rapi_cert_backup])
405

    
406

    
407
def TestClusterBurnin():
408
  """Burnin"""
409
  master = qa_config.GetMasterNode()
410

    
411
  options = qa_config.get("options", {})
412
  disk_template = options.get("burnin-disk-template", "drbd")
413
  parallel = options.get("burnin-in-parallel", False)
414
  check_inst = options.get("burnin-check-instances", False)
415
  do_rename = options.get("burnin-rename", "")
416
  do_reboot = options.get("burnin-reboot", True)
417
  reboot_types = options.get("reboot-types", constants.REBOOT_TYPES)
418

    
419
  # Get as many instances as we need
420
  instances = []
421
  try:
422
    try:
423
      num = qa_config.get("options", {}).get("burnin-instances", 1)
424
      for _ in range(0, num):
425
        instances.append(qa_config.AcquireInstance())
426
    except qa_error.OutOfInstancesError:
427
      print "Not enough instances, continuing anyway."
428

    
429
    if len(instances) < 1:
430
      raise qa_error.Error("Burnin needs at least one instance")
431

    
432
    script = qa_utils.UploadFile(master["primary"], "../tools/burnin")
433
    try:
434
      # Run burnin
435
      cmd = [script,
436
             "--os=%s" % qa_config.get("os"),
437
             "--minmem-size=%s" % qa_config.get(constants.BE_MINMEM),
438
             "--maxmem-size=%s" % qa_config.get(constants.BE_MAXMEM),
439
             "--disk-size=%s" % ",".join(qa_config.get("disk")),
440
             "--disk-growth=%s" % ",".join(qa_config.get("disk-growth")),
441
             "--disk-template=%s" % disk_template]
442
      if parallel:
443
        cmd.append("--parallel")
444
        cmd.append("--early-release")
445
      if check_inst:
446
        cmd.append("--http-check")
447
      if do_rename:
448
        cmd.append("--rename=%s" % do_rename)
449
      if not do_reboot:
450
        cmd.append("--no-reboot")
451
      else:
452
        cmd.append("--reboot-types=%s" % ",".join(reboot_types))
453
      cmd += [inst["name"] for inst in instances]
454
      AssertCommand(cmd)
455
    finally:
456
      AssertCommand(["rm", "-f", script])
457

    
458
  finally:
459
    for inst in instances:
460
      qa_config.ReleaseInstance(inst)
461

    
462

    
463
def TestClusterMasterFailover():
464
  """gnt-cluster master-failover"""
465
  master = qa_config.GetMasterNode()
466
  failovermaster = qa_config.AcquireNode(exclude=master)
467

    
468
  cmd = ["gnt-cluster", "master-failover"]
469
  try:
470
    AssertCommand(cmd, node=failovermaster)
471
    # Back to original master node
472
    AssertCommand(cmd, node=master)
473
  finally:
474
    qa_config.ReleaseNode(failovermaster)
475

    
476

    
477
def TestClusterMasterFailoverWithDrainedQueue():
478
  """gnt-cluster master-failover with drained queue"""
479
  drain_check = ["test", "-f", pathutils.JOB_QUEUE_DRAIN_FILE]
480

    
481
  master = qa_config.GetMasterNode()
482
  failovermaster = qa_config.AcquireNode(exclude=master)
483

    
484
  # Ensure queue is not drained
485
  for node in [master, failovermaster]:
486
    AssertCommand(drain_check, node=node, fail=True)
487

    
488
  # Drain queue on failover master
489
  AssertCommand(["touch", pathutils.JOB_QUEUE_DRAIN_FILE], node=failovermaster)
490

    
491
  cmd = ["gnt-cluster", "master-failover"]
492
  try:
493
    AssertCommand(drain_check, node=failovermaster)
494
    AssertCommand(cmd, node=failovermaster)
495
    AssertCommand(drain_check, fail=True)
496
    AssertCommand(drain_check, node=failovermaster, fail=True)
497

    
498
    # Back to original master node
499
    AssertCommand(cmd, node=master)
500
  finally:
501
    qa_config.ReleaseNode(failovermaster)
502

    
503
  AssertCommand(drain_check, fail=True)
504
  AssertCommand(drain_check, node=failovermaster, fail=True)
505

    
506

    
507
def TestClusterCopyfile():
508
  """gnt-cluster copyfile"""
509
  master = qa_config.GetMasterNode()
510

    
511
  uniqueid = utils.NewUUID()
512

    
513
  # Create temporary file
514
  f = tempfile.NamedTemporaryFile()
515
  f.write(uniqueid)
516
  f.flush()
517
  f.seek(0)
518

    
519
  # Upload file to master node
520
  testname = qa_utils.UploadFile(master["primary"], f.name)
521
  try:
522
    # Copy file to all nodes
523
    AssertCommand(["gnt-cluster", "copyfile", testname])
524
    _CheckFileOnAllNodes(testname, uniqueid)
525
  finally:
526
    _RemoveFileFromAllNodes(testname)
527

    
528

    
529
def TestClusterCommand():
530
  """gnt-cluster command"""
531
  uniqueid = utils.NewUUID()
532
  rfile = "/tmp/gnt%s" % utils.NewUUID()
533
  rcmd = utils.ShellQuoteArgs(["echo", "-n", uniqueid])
534
  cmd = utils.ShellQuoteArgs(["gnt-cluster", "command",
535
                              "%s >%s" % (rcmd, rfile)])
536

    
537
  try:
538
    AssertCommand(cmd)
539
    _CheckFileOnAllNodes(rfile, uniqueid)
540
  finally:
541
    _RemoveFileFromAllNodes(rfile)
542

    
543

    
544
def TestClusterDestroy():
545
  """gnt-cluster destroy"""
546
  AssertCommand(["gnt-cluster", "destroy", "--yes-do-it"])
547

    
548

    
549
def TestClusterRepairDiskSizes():
550
  """gnt-cluster repair-disk-sizes"""
551
  AssertCommand(["gnt-cluster", "repair-disk-sizes"])